diff --git a/tests/reasoning/test_exaone4_reasoning_parser.py b/tests/reasoning/test_exaone4_reasoning_parser.py
new file mode 100644
index 000000000000..8566c7835cc5
--- /dev/null
+++ b/tests/reasoning/test_exaone4_reasoning_parser.py
@@ -0,0 +1,340 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+from vllm.entrypoints.openai.protocol import ChatCompletionRequest
+
+parser_name = "exaone4"
+start_token = "<think>"
+end_token = "</think>"
+
+REASONING_MODEL_NAME = "LGAI-EXAONE/EXAONE-4.0-1.2B"
+
+
+@pytest.fixture(scope="module")
+def exaone4_tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+SIMPLE_REASONING = {
+    "output": "This is a reasoning section</think>This is the rest",
+    "reasoning_content": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+COMPLETE_REASONING = {
+    "output": "This is a reasoning section</think>",
+    "reasoning_content": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+}
+NO_REASONING = {
+    "output": "This is content",
+    "reasoning_content": None,
+    "content": "This is content",
+    "is_reasoning_end": False,
+    "skip_extract_content": True,
+}
+NO_REASONING_STREAMING = {
+    "output": "This is a normal section",
+    "reasoning_content": None,
+    "content": "This is a normal section",
+    "is_reasoning_end": False,
+    "skip_extract_content": True,
+}
+NO_REASONING_STREAMING_WITH_THINK = {
+    "output": "This is a normal section",
+    "reasoning_content": "This is a normal section",
+    "content": None,
+    "is_reasoning_end": False,
+}
+MULTIPLE_LINES = {
+    "output": "This\nThat</think>This is the rest\nThat",
+    "reasoning_content": "This\nThat",
+    "content": "This is the rest\nThat",
+    "is_reasoning_end": True,
+}
+SHORTEST_REASONING_NO_STREAMING = {
+    "output": "</think>This is the rest",
+    "reasoning_content": "",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+SHORTEST_REASONING = {
+    "output": "</think>This is the rest",
+    "reasoning_content": None,
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+REASONING_WITH_THINK = {
+    "output": "<think>This is a reasoning section</think>This is the rest",
+    "reasoning_content": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+COMPLETE_REASONING_WITH_THINK = {
+    "output": "<think>This is a reasoning section</think>",
+    "reasoning_content": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+}
+MULTIPLE_LINES_WITH_THINK = {
+    "output": "<think>This\nThat</think>This is the rest\nThat",
+    "reasoning_content": "This\nThat",
+    "content": "This is the rest\nThat",
+    "is_reasoning_end": True,
+}
+SHORTEST_REASONING_NO_STREAMING_WITH_THINK = {
+    "output": "</think>This is the rest",
+    "reasoning_content": "",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+SHORTEST_REASONING_WITH_THINK = {
+    "output": "</think>This is the rest",
+    "reasoning_content": None,
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+THINK_NO_END = {
+    "output": "<think>This is a reasoning section",
+    "reasoning_content": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": False,
+}
+EMPTY = {
+    "output": "",
+    "reasoning_content": None,
+    "content": "",
+    "is_reasoning_end": False,
+}
+EMPTY_STREAMING = {
+    "output": "",
+    "reasoning_content": None,
+    "content": None,
+    "is_reasoning_end": False,
+}
+NEW_LINE = {
+    "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
+    "reasoning_content": "This is a reasoning section",
+    "content": "\nThis is the rest",
+    "is_reasoning_end": True,
+}
+NEW_LINE_STREAMING = {
+    "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
+    "reasoning_content": "\nThis is a reasoning section",
+    "content": "\nThis is the rest",
+    "is_reasoning_end": True,
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        SIMPLE_REASONING,
+        False,
+        id="simple_reasoning",
+    ),
+    pytest.param(
+        True,
+        SIMPLE_REASONING,
+        True,
+        id="simple_reasoning_streaming",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        False,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        True,
+        id="complete_reasoning_streaming",
+    ),
+    pytest.param(
+        False,
+        NO_REASONING,
+        False,
+        id="no_reasoning_token",
+    ),
+    pytest.param(
+        True,
+        NO_REASONING_STREAMING,
+        False,
+        id="no_reasoning_token_streaming",
+    ),
+    pytest.param(
+        True,
+        NO_REASONING_STREAMING_WITH_THINK,
+        True,
+        id="no_reasoning_token_streaming_with_think",
+    ),
+    pytest.param(
+        False,
+        MULTIPLE_LINES,
+        False,
+        id="multiple_lines",
+    ),
+    pytest.param(
+        True,
+        MULTIPLE_LINES,
+        True,
+        id="multiple_lines_streaming",
+    ),
+    pytest.param(
+        True,
+        SHORTEST_REASONING,
+        False,
+        id="shortest",
+    ),
+    pytest.param(
+        False,
+        SHORTEST_REASONING_NO_STREAMING,
+        True,
+        id="shortest_streaming",
+    ),
+    pytest.param(
+        False,
+        REASONING_WITH_THINK,
+        False,
+        id="reasoning_with_think",
+    ),
+    pytest.param(
+        True,
+        REASONING_WITH_THINK,
+        True,
+        id="reasoning_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING_WITH_THINK,
+        False,
+        id="complete_reasoning_with_think",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING_WITH_THINK,
+        True,
+        id="complete_reasoning_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        MULTIPLE_LINES_WITH_THINK,
+        False,
+        id="multiple_lines_with_think",
+    ),
+    pytest.param(
+        True,
+        MULTIPLE_LINES_WITH_THINK,
+        True,
+        id="multiple_lines_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        SHORTEST_REASONING_NO_STREAMING_WITH_THINK,
+        False,
+        id="shortest_with_think",
+    ),
+    pytest.param(
+        True,
+        SHORTEST_REASONING_WITH_THINK,
+        True,
+        id="shortest_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        THINK_NO_END,
+        False,
+        id="think_no_end",
+    ),
+    pytest.param(
+        True,
+        THINK_NO_END,
+        True,
+        id="think_no_end_streaming",
+    ),
+    pytest.param(
+        False,
+        EMPTY,
+        False,
+        id="empty",
+    ),
+    pytest.param(
+        True,
+        EMPTY_STREAMING,
+        True,
+        id="empty_streaming",
+    ),
+    pytest.param(
+        False,
+        NEW_LINE,
+        False,
+        id="new_line",
+    ),
+    pytest.param(
+        True,
+        NEW_LINE_STREAMING,
+        True,
+        id="new_line_streaming",
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict, enable_thinking", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    enable_thinking: bool,
+    exaone4_tokenizer,
+):
+    output = exaone4_tokenizer.tokenize(param_dict["output"])
+    # decode everything to tokens
+    output_tokens: list[str] = [
+        exaone4_tokenizer.convert_tokens_to_string([token])
+        for token in output
+    ]
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
+        parser_name)(exaone4_tokenizer)
+
+    dummy_request = ChatCompletionRequest(
+        messages=[],
+        chat_template_kwargs={"enable_thinking": enable_thinking},
+    )
+    reasoning, content = run_reasoning_extraction(parser,
+                                                  output_tokens,
+                                                  request=dummy_request if enable_thinking else None,
+                                                  streaming=streaming)
+
+    assert reasoning == param_dict["reasoning_content"]
+    assert content == param_dict["content"]
+
+    # Test is_reasoning_end
+    output_ids = exaone4_tokenizer.convert_tokens_to_ids(output)
+    is_reasoning_end = parser.is_reasoning_end(output_ids)
+    assert is_reasoning_end == param_dict["is_reasoning_end"]
+
+    # Test extract_content
+    
+    # NOTE: In case of `no_reasoning_token`s, We omit the extract_content test.
+    # By default, EXAONE 4.0 parser assumes the content is the whole output 
+    # if there is no '<think>' or '</think>', and `enable_thinking=False`. 
+    # `extract_content_ids()` cannot get `enable_thinking` from the request,
+    # and it is only used for removing the reasoning content from the output
+    # on vllm.entrypoints.openai.serving_chat.py.
+    # So we let `extract_content_ids()` as is (assume the output is reasoning content 
+    # with the condition: no '<think>' or '</think>' and `enable_thinking=False`).
+    if param_dict.get("skip_extract_content", False):
+        return
+
+    if param_dict["content"] is not None:
+        content = parser.extract_content_ids(output_ids)
+        assert content == exaone4_tokenizer.convert_tokens_to_ids(
+            exaone4_tokenizer.tokenize(param_dict["content"]))
+    else:
+        content = parser.extract_content_ids(output)
+        assert content == []
diff --git a/tests/reasoning/test_granite_reasoning_parser.py b/tests/reasoning/test_granite_reasoning_parser.py
index 38cab73a45f2..d94362f3c405 100644
--- a/tests/reasoning/test_granite_reasoning_parser.py
+++ b/tests/reasoning/test_granite_reasoning_parser.py
@@ -336,6 +336,7 @@ def test_streaming_subcases(param_dict):
         previous_token_ids=previous_token_ids,
         current_token_ids=current_token_ids,
         delta_token_ids=delta_token_ids,
+        request=None,
     )
     # Streaming currently expects at least one of reasoning content / content,
     # so the response should return None in that case.
diff --git a/tests/reasoning/utils.py b/tests/reasoning/utils.py
index 9af5fa5addbc..38d4d2eb8b58 100644
--- a/tests/reasoning/utils.py
+++ b/tests/reasoning/utils.py
@@ -115,6 +115,7 @@ def run_reasoning_extraction_streaming(
             previous_tokens,
             current_tokens,
             token_delta,
+            request,
         )
         if delta_message is not None:
             reconstructor.append_delta(delta_message)
@@ -147,6 +148,7 @@ def run_reasoning_extraction_streaming_mistral(
             previous_tokens,
             current_tokens,
             token_delta,
+            request,
         )
         if delta_message is not None:
             reconstructor.append_delta(delta_message)
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index b4231c6d10c4..b1e1aadb9644 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -696,6 +696,7 @@ async def chat_completion_stream_generator(
                                     previous_token_ids,
                                     current_token_ids,
                                     output.token_ids,
+                                    request,
                                 ))
                             # When encountering think end id in delta_token_ids
                             # or think end id in prompt_token_ids
@@ -781,6 +782,7 @@ async def chat_completion_stream_generator(
                                     previous_token_ids,
                                     current_token_ids,
                                     output.token_ids,
+                                    request,
                                 ))
                             # When encountering think end id in prompt_token_ids
                             # i.e {"enable_thinking": False},
@@ -858,6 +860,7 @@ async def chat_completion_stream_generator(
                                              previous_token_ids,
                                              current_token_ids,
                                              output.token_ids,
+                                             request,
                                          ))
                     # handle streaming just a content delta
                     else:
diff --git a/vllm/reasoning/__init__.py b/vllm/reasoning/__init__.py
index b987adeb6428..98b3c4abc646 100644
--- a/vllm/reasoning/__init__.py
+++ b/vllm/reasoning/__init__.py
@@ -10,6 +10,7 @@
 from .mistral_reasoning_parser import MistralReasoningParser
 from .qwen3_reasoning_parser import Qwen3ReasoningParser
 from .step3_reasoning_parser import Step3ReasoningParser
+from .exaone4_reasoning_parser import Exaone4ReasoningParser
 
 __all__ = [
     "ReasoningParser",
@@ -22,4 +23,5 @@
     "MistralReasoningParser",
     "Step3ReasoningParser",
     "GptOssReasoningParser",
+    "Exaone4ReasoningParser",
 ]
diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py
index 4f4522d726e8..5eede425f36e 100644
--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@@ -105,6 +105,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
     ) -> Union[DeltaMessage, None]:
         """
         Instance method that should be implemented for extracting reasoning
diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py
index 1a5ca46a60f1..163eb8c6138f 100644
--- a/vllm/reasoning/deepseek_r1_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py
@@ -64,6 +64,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
     ) -> Union[DeltaMessage, None]:
         """
         Extract reasoning content from a delta message.
diff --git a/vllm/reasoning/exaone4_reasoning_parser.py b/vllm/reasoning/exaone4_reasoning_parser.py
new file mode 100644
index 000000000000..cf9a3094fa94
--- /dev/null
+++ b/vllm/reasoning/exaone4_reasoning_parser.py
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Sequence
+from typing import Optional, Union
+
+from transformers import PreTrainedTokenizerBase
+
+from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
+                                              DeltaMessage)
+from vllm.logger import init_logger
+from vllm.reasoning import ReasoningParserManager
+from vllm.reasoning.deepseek_r1_reasoning_parser import (
+    DeepSeekR1ReasoningParser)
+
+logger = init_logger(__name__)
+
+
+@ReasoningParserManager.register_module("exaone4")
+class Exaone4ReasoningParser(DeepSeekR1ReasoningParser):
+    """
+    Reasoning parser for EXAONE 4.0 model.
+
+    The EXAONE 4.0 model uses <think>...</think> tokens to denote reasoning
+    text. This parser extracts the reasoning content from the model output.
+    """
+
+    def __init__(self, tokenizer: PreTrainedTokenizerBase):
+        super().__init__(tokenizer)
+
+        if not self.model_tokenizer:
+            raise ValueError(
+                "The model tokenizer must be passed to the ReasoningParser "
+                "constructor during construction.")
+
+        self.start_token_id = self.vocab.get(self.start_token)
+        self.end_token_id = self.vocab.get(self.end_token)
+        if self.start_token_id is None or self.end_token_id is None:
+            raise RuntimeError(
+                "EXAONE 4.0 reasoning parser could not locate think start/end "
+                "tokens in the tokenizer!")
+
+    def extract_reasoning_content_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int],
+        current_token_ids: Sequence[int],
+        delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
+    ) -> Union[DeltaMessage, None]:
+        """
+        Extract reasoning content from a delta message.
+        Handles streaming output where previous + delta = current.
+        Uses token IDs for faster processing.
+        For text <think>abc</think>xyz:
+        - 'abc' goes to reasoning_content
+        - 'xyz' goes to content
+        """
+        # Skip single special tokens
+        if len(delta_token_ids) == 1 and (delta_token_ids[0] in [
+                self.start_token_id, self.end_token_id
+        ]):
+            return None
+
+        # Check if <think> is present in previous or delta.
+        # Keep compatibility with models that don't generate <think> tokens.
+        if self.start_token_id in previous_token_ids:
+            if self.end_token_id in delta_token_ids:
+                # <think> in previous, </think> in delta,
+                # extract reasoning content
+                end_index = delta_text.find(self.end_token)
+                reasoning_content = delta_text[:end_index]
+                content = delta_text[end_index + len(self.end_token):]
+                return DeltaMessage(
+                    reasoning_content=reasoning_content,
+                    content=content if content else None,
+                )
+            elif self.end_token_id in previous_token_ids:
+                # <think> in previous, </think> in previous,
+                # reasoning content continues
+                return DeltaMessage(content=delta_text)
+            else:
+                # <think> in previous, no </think> in previous or delta,
+                # reasoning content continues
+                return DeltaMessage(reasoning_content=delta_text)
+        elif self.start_token_id in delta_token_ids:
+            if self.end_token_id in delta_token_ids:
+                # <think> in delta, </think> in delta, extract reasoning content
+                start_index = delta_text.find(self.start_token)
+                end_index = delta_text.find(self.end_token)
+                reasoning_content = delta_text[start_index +
+                                               len(self.start_token):end_index]
+                content = delta_text[end_index + len(self.end_token):]
+                return DeltaMessage(
+                    reasoning_content=reasoning_content,
+                    content=content if content else None,
+                )
+            else:
+                # <think> in delta, no </think> in delta,
+                # reasoning content continues
+                return DeltaMessage(reasoning_content=delta_text)
+        else:
+            # No <think> in previous or delta, also need to check for </think>.
+            # Because the model may have generated </think> without <think>
+            # Ref https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B/blob/main/chat_template.jinja#L139-L146
+            if self.end_token_id in delta_token_ids:
+                # </think> in delta with more tokens,
+                # extract reasoning content and content
+                end_index = delta_text.find(self.end_token)
+                reasoning_content = delta_text[:end_index]
+                content = delta_text[end_index + len(self.end_token):]
+                return DeltaMessage(
+                    reasoning_content=reasoning_content,
+                    content=content if content else None,
+                )
+            elif self.end_token_id in previous_token_ids:
+                # </think> in previous, thinking content ends
+                return DeltaMessage(content=delta_text)
+            else:
+                # no </think> in previous or delta, 
+                # use `enable_thinking` to determine if the model is thinking
+                if request is not None and \
+                   request.chat_template_kwargs is not None and \
+                   request.chat_template_kwargs.get("enable_thinking"):
+                    return DeltaMessage(reasoning_content=delta_text)
+                else:
+                    return DeltaMessage(content=delta_text)
+
+    def extract_reasoning_content(
+            self, model_output: str, request: ChatCompletionRequest
+    ) -> tuple[Optional[str], Optional[str]]:
+        """
+        Extract reasoning content from the model output.
+
+        For text <think>abc</think>xyz:
+        - 'abc' goes to reasoning_content
+        - 'xyz' goes to content
+
+        Returns:
+            tuple[Optional[str], Optional[str]]: reasoning content and content
+        """
+
+        # Check if the start token is present in the model output, remove it
+        # if it is present.
+        model_output_parts = model_output.partition(self.start_token)
+        model_output = model_output_parts[2] if model_output_parts[
+            1] else model_output_parts[0]
+
+        # EXAONE 4.0 doesn't generate <think> tokens.
+        # Ref https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B/blob/main/chat_template.jinja#L139-L146
+        if self.end_token not in model_output:
+            if model_output_parts[1]:
+                return model_output, None
+            return None, model_output
+        else:
+            reasoning_content, _, content = model_output.partition(
+                self.end_token)
+
+            final_content = content or None
+            return reasoning_content, final_content
diff --git a/vllm/reasoning/glm4_moe_reasoning_parser.py b/vllm/reasoning/glm4_moe_reasoning_parser.py
index 460e38d2d396..76e566629054 100644
--- a/vllm/reasoning/glm4_moe_reasoning_parser.py
+++ b/vllm/reasoning/glm4_moe_reasoning_parser.py
@@ -64,6 +64,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
     ) -> Union[DeltaMessage, None]:
         """
         Extract reasoning content from a delta message.
diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py
index 05a72ac23bf2..38bd72811625 100644
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -51,6 +51,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
     ) -> Union[DeltaMessage, None]:
         raise RuntimeError(
             "GptOss model uses harmony to extract reasoning content. This "
diff --git a/vllm/reasoning/granite_reasoning_parser.py b/vllm/reasoning/granite_reasoning_parser.py
index 5820001b918f..a1aeed8b8fe9 100644
--- a/vllm/reasoning/granite_reasoning_parser.py
+++ b/vllm/reasoning/granite_reasoning_parser.py
@@ -83,6 +83,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
     ) -> Union[DeltaMessage, None]:
         """Extract the reasoning content / content emitted by granite models;
         If the sequence doesn't match what we expect, i.e., the model generates
diff --git a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
index b2452b95c1c6..de4908dffbbe 100644
--- a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
+++ b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py
@@ -152,6 +152,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
     ) -> Union[DeltaMessage, None]:
         """Extract content using token ID sequence state machine"""
         # Define sequences
diff --git a/vllm/reasoning/qwen3_reasoning_parser.py b/vllm/reasoning/qwen3_reasoning_parser.py
index 61bafc724c17..7d167639a8db 100644
--- a/vllm/reasoning/qwen3_reasoning_parser.py
+++ b/vllm/reasoning/qwen3_reasoning_parser.py
@@ -64,6 +64,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        request: ChatCompletionRequest,
     ) -> Union[DeltaMessage, None]:
         """
         Extract reasoning content from a delta message.