diff --git a/functions/pipes/anthropic/main.py b/functions/pipes/anthropic/main.py
index 44b8e8e..e2edfdd 100644
--- a/functions/pipes/anthropic/main.py
+++ b/functions/pipes/anthropic/main.py
@@ -1,11 +1,12 @@
 """
-title: Anthropic Manifold Pipe
-authors: justinh-rahb, christian-taillon, jfbloom22
-author_url: https://github.com/justinh-rahb
+title: Anthropic Manifold Pipe with Extended Thinking and Cache Control
+authors: justinh-rahb, christian-taillon, jfbloom22, Mark Kazakov, Vincent, NIK-NUB, cache control added by Snav
+author_url: https://github.com/jfbloom22
 funding_url: https://github.com/open-webui
-version: 0.3.0
+version: 0.5.0
 required_open_webui_version: 0.3.17
 license: MIT
+description: An advanced manifold pipe for interacting with Anthropic's Claude models, featuring extended thinking support, cache control, beta features, and sophisticated model handling for Claude 4.5.
 """
 
 import os
@@ -18,18 +19,46 @@
 
 
 class Pipe:
+    CACHE_TTL = "1h"
+
     class Valves(BaseModel):
-        ANTHROPIC_API_KEY: str = Field(default="")
+        ANTHROPIC_API_KEY: str = Field(default="", description="Anthropic API Key")
+        CLAUDE_USE_TEMPERATURE: bool = Field(
+            default=True,
+            description="For Claude 4.x models (Opus 4, Sonnet 4.5, Haiku 3.5+): Use temperature (True) or top_p (False). Claude 4.x models only support one parameter.",
+        )
+        BETA_FEATURES: str = Field(
+            default="",
+            description="Enable Anthropic Beta Features. e.g.: context-management-2025-06-27",
+        )
+        ENABLE_THINKING: bool = Field(
+            default=True,
+            description="Enable Claude's extended thinking capabilities (Claude 4.5 Sonnet with thinking model only)",
+        )
+        THINKING_BUDGET: int = Field(
+            default=16000,
+            description="Maximum number of tokens Claude can use for thinking (min: 1024, max: 32000)",
+        )
+        DISPLAY_THINKING: bool = Field(
+            default=True, description="Display Claude's thinking process in the chat"
+        )
 
     def __init__(self):
         self.type = "manifold"
         self.id = "anthropic"
         self.name = "anthropic/"
         self.valves = self.Valves(
-            **{"ANTHROPIC_API_KEY": os.getenv("ANTHROPIC_API_KEY", "")}
+            **{
+                "ANTHROPIC_API_KEY": os.getenv("ANTHROPIC_API_KEY", ""),
+                "CLAUDE_USE_TEMPERATURE": True,  # Use temperature for Claude 4.x models
+                "BETA_FEATURES": "",
+                "ENABLE_THINKING": True,
+                "THINKING_BUDGET": 16000,
+                "DISPLAY_THINKING": True,
+            }
         )
         self.MAX_IMAGE_SIZE = 5 * 1024 * 1024  # 5MB per image
-        
+
         # Model cache
         self._model_cache: Optional[List[Dict[str, str]]] = None
         self._model_cache_time: float = 0
@@ -106,6 +135,105 @@ def get_anthropic_models(self) -> List[Dict[str, str]]:
         """
         return self.get_anthropic_models_from_api()
 
+    def _attach_cache_control(self, block: dict):
+        """Attach cache control to a content block."""
+        if not isinstance(block, dict):
+            return block
+
+        # Skip block types that cannot be cached directly per Anthropic docs
+        if block.get("type") in {"thinking", "redacted_thinking"}:
+            return block
+
+        if not block.get("type"):
+            block["type"] = "text"
+            if "text" not in block:
+                block["text"] = ""
+
+        cache_control = dict(block.get("cache_control", {}))
+        cache_control["type"] = "ephemeral"
+        cache_control["ttl"] = self.CACHE_TTL
+        block["cache_control"] = cache_control
+        return block
+
+    def _normalize_content_blocks(self, raw_content):
+        """Normalize content into proper block format."""
+        blocks = []
+
+        if isinstance(raw_content, list):
+            items = raw_content
+        else:
+            items = [raw_content]
+
+        for item in items:
+            if isinstance(item, dict) and item.get("type"):
+                blocks.append(dict(item))
+            elif isinstance(item, dict) and "content" in item:
+                # Handle message-style dicts that still wrap content
+                blocks.extend(self._normalize_content_blocks(item["content"]))
+            elif item is not None:
+                blocks.append({"type": "text", "text": str(item)})
+
+        return blocks
+
+    def _prepare_system_blocks(self, system_message):
+        """Prepare system message with cache control."""
+        if not system_message:
+            return None
+
+        # Open WebUI may hand us a raw message dict, list of blocks, or plain text
+        content = (
+            system_message.get("content")
+            if isinstance(system_message, dict) and "content" in system_message
+            else system_message
+        )
+
+        normalized_blocks = self._normalize_content_blocks(content)
+        cached_blocks = [
+            self._attach_cache_control(block) for block in normalized_blocks
+        ]
+
+        return cached_blocks if cached_blocks else None
+
+    def _apply_cache_control_to_last_message(self, messages):
+        """Apply cache control to the last user message."""
+        if not messages:
+            return
+
+        last_message = messages[-1]
+        if last_message.get("role") != "user":
+            return
+
+        for block in reversed(last_message.get("content", [])):
+            if isinstance(block, dict) and block.get("type") not in {
+                "thinking",
+                "redacted_thinking",
+            }:
+                self._attach_cache_control(block)
+                break
+
+    def _is_claude_4x_model(self, model_name: str) -> bool:
+        """
+        Determine if a model is a Claude 4.x generation model that has temperature/top_p constraints.
+        Uses a more future-proof approach than simple prefix matching.
+
+        Args:
+            model_name: The model name to check
+
+        Returns:
+            True if this is a Claude 4.x model with constraints
+        """
+        import re
+
+        # Pattern to match Claude 4.x models with various version suffixes
+        # Examples: claude-opus-4, claude-opus-4-1-20250805, claude-sonnet-4-5, claude-sonnet-4-5-20250929, claude-3-5-haiku-latest
+        # The pattern allows for optional sub-versions (like -1, -5) and dates
+        pattern = r"^claude-(opus|sonnet|haiku)-4(?:-\d+)?(?:-\d{8})?$"
+        
+        # Also match the latest haiku variants (claude-3-5-haiku-latest is actually Claude 4.x generation)
+        haiku_pattern = r"^claude-3-5-haiku"
+
+        return bool(re.match(pattern, model_name)) or bool(re.match(haiku_pattern, model_name))
+
     def pipes(self) -> List[dict]:
         return self.get_anthropic_models()
 
@@ -150,7 +278,7 @@ def pipe(self, body: dict) -> Union[str, Generator, Iterator]:
         system_message, messages = pop_system_message(body["messages"])
 
         processed_messages = []
-        total_image_size = 0
+        total_image_size = 0.0
 
         for message in messages:
             processed_content = []
@@ -172,6 +300,20 @@ def pipe(self, body: dict) -> Union[str, Generator, Iterator]:
                                 raise ValueError(
                                     "Total size of images exceeds 100 MB limit"
                                 )
+                    elif item["type"] == "thinking" and "signature" in item:
+                        # Include thinking blocks if present in the message
+                        processed_content.append(
+                            {
+                                "type": "thinking",
+                                "thinking": item["thinking"],
+                                "signature": item["signature"],
+                            }
+                        )
+                    elif item["type"] == "redacted_thinking" and "data" in item:
+                        # Include redacted thinking blocks if present
+                        processed_content.append(
+                            {"type": "redacted_thinking", "data": item["data"]}
+                        )
             else:
                 processed_content = [
                     {"type": "text", "text": message.get("content", "")}
@@ -181,23 +323,71 @@ def pipe(self, body: dict) -> Union[str, Generator, Iterator]:
                 {"role": message["role"], "content": processed_content}
             )
 
+        system_blocks = self._prepare_system_blocks(system_message)
+        self._apply_cache_control_to_last_message(processed_messages)
+
+        model_name = body["model"][body["model"].find(".") + 1 :]
+
+        # Check if this is a thinking model
+        is_thinking_model = model_name.endswith("-think")
+
+        # Remove the "-think" suffix for API call if present
+        api_model_name = (
+            model_name.replace("-think", "") if is_thinking_model else model_name
+        )
+
+        # Determine if thinking will be enabled
+        will_enable_thinking = (
+            self.valves.ENABLE_THINKING
+            and is_thinking_model
+            and "claude-sonnet-4-5" in model_name
+        )
+
         payload = {
-            "model": body["model"][body["model"].find(".") + 1 :],
+            "model": api_model_name,
             "messages": processed_messages,
             "max_tokens": body.get("max_tokens", 4096),
-            "temperature": body.get("temperature", 0.8),
-            "top_k": body.get("top_k", 40),
-            "top_p": body.get("top_p", 0.9),
             "stop_sequences": body.get("stop", []),
-            **({"system": str(system_message)} if system_message else {}),
             "stream": body.get("stream", False),
         }
 
+        if system_blocks:
+            payload["system"] = system_blocks
+
+        # Only add top_k if thinking is NOT enabled
+        if not will_enable_thinking:
+            payload["top_k"] = body.get("top_k", 40)
+
+        # Add extended thinking for Claude 4.5 Sonnet with thinking
+        if will_enable_thinking:
+            # Ensure thinking budget is within reasonable limits (1024-32000 tokens)
+            thinking_budget = max(1024, min(32000, self.valves.THINKING_BUDGET))
+            payload["thinking"] = {"type": "enabled", "budget_tokens": thinking_budget}
+
+        # Handle temperature/top_p settings based on model generation
+        # Claude 4.x models only support either temperature OR top_p, not both
+        is_claude_4x_model = self._is_claude_4x_model(api_model_name)
+
+        if is_claude_4x_model:
+            if is_thinking_model:
+                # For thinking model, always use temperature = 1.0
+                payload["temperature"] = 1.0
+            elif self.valves.CLAUDE_USE_TEMPERATURE:
+                payload["temperature"] = body.get("temperature", 0.8)
+            else:
+                payload["top_p"] = body.get("top_p", 0.9)
+        else:
+            # Other Claude models support both temperature and top_p
+            payload["temperature"] = body.get("temperature", 0.8)
+            payload["top_p"] = body.get("top_p", 0.9)
+
         headers = {
             "x-api-key": self.valves.ANTHROPIC_API_KEY,
             "anthropic-version": "2023-06-01",
             "content-type": "application/json",
         }
+        if self.valves.BETA_FEATURES:
+            headers["anthropic-beta"] = self.valves.BETA_FEATURES
 
         url = "https://api.anthropic.com/v1/messages"
 
@@ -214,14 +404,26 @@ def pipe(self, body: dict) -> Union[str, Generator, Iterator]:
             return f"Error: {e}"
 
     def stream_response(self, url, headers, payload):
+        """Handle streaming response with the OpenWebUI thinking tags."""
         try:
             with requests.post(
                 url, headers=headers, json=payload, stream=True, timeout=(3.05, 60)
             ) as response:
                 if response.status_code != 200:
-                    raise Exception(
-                        f"HTTP Error {response.status_code}: {response.text}"
-                    )
+                    error_text = response.text
+                    try:
+                        error_json = response.json()
+                        if "error" in error_json:
+                            error_text = error_json["error"].get("message", error_text)
+                    except:
+                        pass
+                    raise Exception(f"HTTP Error {response.status_code}: {error_text}")
+
+                thinking_content = ""
+                is_thinking_block = False
+                is_text_block = False
+                has_yielded_thinking = False
+                has_yielded_think_tag = False
 
                 for line in response.iter_lines():
                     if line:
@@ -229,13 +431,124 @@ def stream_response(self, url, headers, payload):
                         if line.startswith("data: "):
                             try:
                                 data = json.loads(line[6:])
+
+                                # Handle content block starts
                                 if data["type"] == "content_block_start":
-                                    yield data["content_block"]["text"]
+                                    block_type = data["content_block"].get("type", "")
+
+                                    # Handle thinking block start
+                                    if block_type == "thinking":
+                                        is_thinking_block = True
+                                        # Emit thinking start tag immediately
+                                        if (
+                                            not has_yielded_think_tag
+                                            and self.valves.DISPLAY_THINKING
+                                        ):
+                                            yield "<think>"
+                                            has_yielded_think_tag = True
+
+                                    # Handle transition to text block
+                                    elif block_type == "text":
+                                        # If we were in a thinking block, close it before starting text
+                                        if is_thinking_block and has_yielded_think_tag:
+                                            yield "</think>"
+                                            has_yielded_thinking = True
+
+                                        is_thinking_block = False
+                                        is_text_block = True
+
+                                        # For text blocks, yield the initial text if any
+                                        if (
+                                            "text" in data["content_block"]
+                                            and data["content_block"]["text"]
+                                        ):
+                                            yield data["content_block"]["text"]
+
+                                    # Handle redacted thinking block
+                                    elif (
+                                        block_type == "redacted_thinking"
+                                        and self.valves.DISPLAY_THINKING
+                                    ):
+                                        if not has_yielded_think_tag:
+                                            yield "<think>"
+                                            has_yielded_think_tag = True
+                                        yield "[Redacted thinking content]"
+
+                                # Handle block deltas
                                 elif data["type"] == "content_block_delta":
-                                    yield data["delta"]["text"]
+                                    delta = data["delta"]
+
+                                    # Stream thinking deltas with the thinking tag
+                                    if (
+                                        delta["type"] == "thinking_delta"
+                                        and is_thinking_block
+                                        and self.valves.DISPLAY_THINKING
+                                    ):
+                                        thinking_content += delta["thinking"]
+                                        yield delta["thinking"]
+
+                                    # Stream text deltas normally
+                                    elif (
+                                        delta["type"] == "text_delta" and is_text_block
+                                    ):
+                                        yield delta["text"]
+
+                                # Handle block stops
+                                elif data["type"] == "content_block_stop":
+                                    if is_thinking_block:
+                                        is_thinking_block = False
+                                        # Close thinking tag at the end of thinking block
+                                        if (
+                                            has_yielded_think_tag
+                                            and not has_yielded_thinking
+                                        ):
+                                            yield "</think>"
+                                            has_yielded_thinking = True
+                                    elif is_text_block:
+                                        is_text_block = False
+
+                                # Handle message stop
                                 elif data["type"] == "message_stop":
+                                    # Make sure thinking tag is closed if needed
+                                    if (
+                                        has_yielded_think_tag
+                                        and not has_yielded_thinking
+                                    ):
+                                        yield "</think>"
                                     break
+
+                                # Handle single message (non-streaming style response in stream)
                                 elif data["type"] == "message":
+                                    has_thinking = False
+
+                                    # First check if there's thinking content
+                                    for content in data.get("content", []):
+                                        if (
+                                            content["type"] == "thinking"
+                                            or content["type"] == "redacted_thinking"
+                                        ) and self.valves.DISPLAY_THINKING:
+                                            has_thinking = True
+                                            break
+
+                                    # If there's thinking, handle it first
+                                    if has_thinking:
+                                        yield "<think>"
+
+                                        for content in data.get("content", []):
+                                            if (
+                                                content["type"] == "thinking"
+                                                and self.valves.DISPLAY_THINKING
+                                            ):
+                                                yield content["thinking"]
+                                            elif (
+                                                content["type"] == "redacted_thinking"
+                                                and self.valves.DISPLAY_THINKING
+                                            ):
+                                                yield "[Redacted thinking content]"
+
+                                        yield "</think>"
+
+                                    # Then yield all text blocks
                                     for content in data.get("content", []):
                                         if content["type"] == "text":
                                             yield content["text"]
@@ -257,17 +570,51 @@ def stream_response(self, url, headers, payload):
             yield f"Error: {e}"
 
     def non_stream_response(self, url, headers, payload):
+        """Handle non-streaming response from Anthropic API, including thinking blocks."""
         try:
             response = requests.post(
                 url, headers=headers, json=payload, timeout=(3.05, 60)
             )
             if response.status_code != 200:
-                raise Exception(f"HTTP Error {response.status_code}: {response.text}")
+                error_text = response.text
+                try:
+                    error_json = response.json()
+                    if "error" in error_json:
+                        error_text = error_json["error"].get("message", error_text)
+                except:
+                    pass
+                raise Exception(f"HTTP Error {response.status_code}: {error_text}")
 
             res = response.json()
-            return (
-                res["content"][0]["text"] if "content" in res and res["content"] else ""
-            )
+
+            if "content" not in res or not res["content"]:
+                return ""
+
+            has_thinking = False
+            thinking_content = ""
+            text_content = ""
+
+            # First organize content by type
+            for content_block in res["content"]:
+                if content_block["type"] == "thinking" and self.valves.DISPLAY_THINKING:
+                    has_thinking = True
+                    thinking_content += content_block["thinking"]
+                elif (
+                    content_block["type"] == "redacted_thinking"
+                    and self.valves.DISPLAY_THINKING
+                ):
+                    has_thinking = True
+                    thinking_content += "[Redacted thinking content]"
+                elif content_block["type"] == "text":
+                    text_content += content_block["text"]
+
+            # Then construct the response with the <think> tags
+            result = ""
+            if has_thinking:
+                result += f"<think>{thinking_content}</think>"
+
+            result += text_content
+            return result
         except requests.exceptions.RequestException as e:
             print(f"Failed non-stream request: {e}")
             return f"Error: {e}"