Feat: Support for Claude 3.7 sonnet LLM (#175)

pk-zipstack · web-flow · commit edbdb0c9f6de · 2025-03-24T11:54:32.000+05:30
* Update anthropic llama-index version to 0.6.10 for claude 3.7 support

* Updated anthropic.py to handle thinking capabilities for claude 3.7

* Modified json schema

* Fixed issue with temperature for model

* Changed budget tokens field to thinking budget tokens

Signed-off-by: Praveen Kumar &lt;praveen@zipstack.com&gt;

* Updated version to 0.62.0 __init__.py

Signed-off-by: Praveen Kumar &lt;praveen@zipstack.com&gt;

---------

Signed-off-by: Praveen Kumar &lt;praveen@zipstack.com&gt;
diff --git a/pdm.lock b/pdm.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -39,7 +39,7 @@ dependencies = [
     "llama-index-llms-mistralai==0.3.1",
     "mistralai==1.2.5",
     "llama-index-llms-anyscale==0.3.0",
-    "llama-index-llms-anthropic==0.6.3",
+    "llama-index-llms-anthropic==0.6.10",
     "llama-index-llms-azure-openai==0.3.1",
     "llama-index-llms-vertex==0.4.2",
     "llama-index-llms-replicate==0.4.0",
@@ -127,4 +127,4 @@ path = "src/unstract/sdk/__init__.py"
 [tool.pdm.resolution.overrides]
 grpcio = "1.62.3"
 grpcio-tools = "1.62.3"
-grpcio-health-checking = "1.62.3"
+grpcio-health-checking = "1.62.3"
diff --git a/src/unstract/sdk/__init__.py b/src/unstract/sdk/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.61.0"
+__version__ = "0.62.0"
 
 
 def get_sdk_version():
diff --git a/src/unstract/sdk/adapters/llm/anthropic/src/anthropic.py b/src/unstract/sdk/adapters/llm/anthropic/src/anthropic.py
@@ -20,7 +20,8 @@ class Constants:
     TIMEOUT = "timeout"
     MAX_RETRIES = "max_retries"
     MAX_TOKENS = "max_tokens"
-
+    ENABLE_THINKING = "enable_thinking"
+    BUDGET_TOKENS = "budget_tokens"
 
 class AnthropicLLM(LLMAdapter):
     def __init__(self, settings: dict[str, Any]):
@@ -53,6 +54,16 @@ def get_llm_instance(self) -> LLM:
         max_tokens = int(
             self.config.get(Constants.MAX_TOKENS, DEFAULT_ANTHROPIC_MAX_TOKENS)
         )
+
+        thinking = self.config.get(Constants.ENABLE_THINKING)
+        thinking_dict = None
+        temperature = 0
+
+        if thinking:
+            budget_tokens = self.config.get(Constants.BUDGET_TOKENS)
+            thinking_dict = {"type": "enabled", "budget_tokens": budget_tokens}
+            temperature = 1
+            
         try:
             llm: LLM = Anthropic(
                 model=str(self.config.get(Constants.MODEL)),
@@ -63,8 +74,9 @@ def get_llm_instance(self) -> LLM:
                 max_retries=int(
                     self.config.get(Constants.MAX_RETRIES, LLMKeys.DEFAULT_MAX_RETRIES)
                 ),
-                temperature=0,
+                temperature=temperature,
                 max_tokens=max_tokens,
+                thinking_dict=thinking_dict
             )
             return llm
         except Exception as e:
diff --git a/src/unstract/sdk/adapters/llm/anthropic/src/static/json_schema.json b/src/unstract/sdk/adapters/llm/anthropic/src/static/json_schema.json
@@ -50,6 +50,49 @@
       "title": "Timeout",
       "default": 900,
       "description": "Timeout in seconds"
+    },
+    "enable_thinking": {
+      "type": "boolean",
+      "title": "Enable Extended Thinking",
+      "default": false,
+      "description": "Enhance reasoning for complex tasks with step-by-step transparency. Available only for Claude 3.7 Sonnet."
+    }
+  },
+  "allOf": [
+    {
+      "if": {
+        "properties": {
+          "enable_thinking": {
+            "const": true
+          }
+        }
+      },
+      "then": {
+        "properties": {
+          "budget_tokens": {
+            "type": "number",
+            "minimum": 1024,
+            "default": 1024,
+            "title": "Thinking Budget Tokens",
+            "description": "Sets the max tokens for Claude's internal reasoning when thinking is enabled"
+          }
+        },
+        "required": [
+          "budget_tokens"
+        ]
+      }
+    },
+    {
+      "if": {
+        "properties": {
+          "enable_thinking": {
+            "const": false
+          }
+        }
+      },
+      "then": {
+        "properties": {}
+      }
     }
-  }
+  ]
 }
diff --git a/src/unstract/sdk/adapters/llm/llm_adapter.py b/src/unstract/sdk/adapters/llm/llm_adapter.py
@@ -77,7 +77,10 @@ def _test_llm_instance(llm: Optional[LLM]) -> bool:
         completion_kwargs = {}
         if hasattr(llm, 'model') and getattr(llm, 'model') not in O1_MODELS:
             completion_kwargs['temperature'] = 0.003
-            
+
+        if hasattr(llm, 'thinking_dict') and getattr(llm, 'thinking_dict') is not None:
+            completion_kwargs['temperature'] = 1
+ 
         response = llm.complete(
             "The capital of Tamilnadu is ",
             **completion_kwargs

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "0.61.0"`
	`1`	`+__version__ = "0.62.0"`
`2`	`2`
`3`	`3`
`4`	`4`	`def get_sdk_version():`