[FEAT] Llama-index LLM updates and reasoning effort parameter for OpenAI 'O' series models (#186)

pk-zipstack · chandrasekharan-zipstack · web-flow · commit ac197bb4e76b · 2025-05-23T14:03:05.000+05:30
* Added reasoning effort and support for o3 openai model

* updated azure openai library

* Update src/unstract/sdk/__init__.py

Co-authored-by: Chandrasekharan M &lt;117059509+chandrasekharan-zipstack@users.noreply.github.com&gt;
Signed-off-by: Praveen Kumar &lt;praveen@zipstack.com&gt;

* Added a more meaningful message for the increase of latency and cost for reasoning effort

---------

Signed-off-by: Praveen Kumar &lt;praveen@zipstack.com&gt;
Co-authored-by: Chandrasekharan M &lt;117059509+chandrasekharan-zipstack@users.noreply.github.com&gt;
diff --git a/pyproject.toml b/pyproject.toml
@@ -26,7 +26,7 @@ dependencies = [
     "python-magic~=0.4.27",
     "python-dotenv==1.0.0",
     # Adapter changes
-    "llama-index==0.12.8",
+    "llama-index==0.12.37",
     "tiktoken~=0.9.0",
     "transformers==4.37.0",
     "llama-index-embeddings-google==0.3.0",
@@ -48,14 +48,14 @@ dependencies = [
     "llama-index-vector-stores-weaviate==1.3.1",
     "llama-index-vector-stores-pinecone==0.4.2",
     "llama-index-vector-stores-qdrant==0.4.2",
-    "llama-index-llms-openai==0.3.25",
+    "llama-index-llms-openai==0.3.42",
     "llama-index-llms-palm==0.3.0",
     "llama-index-llms-mistralai==0.3.1",
     "mistralai==1.2.5",
     "llama-index-llms-anyscale==0.3.0",
-    "llama-index-llms-anthropic==0.6.10",
-    "llama-index-llms-azure-openai==0.3.1",
-    "llama-index-llms-vertex==0.4.2",
+    "llama-index-llms-anthropic==0.6.14",
+    "llama-index-llms-azure-openai==0.3.2",
+    "llama-index-llms-vertex==0.4.6",
     "llama-index-llms-replicate==0.4.0",
     "llama-index-llms-ollama==0.5.0",
     "llama-index-llms-bedrock==0.3.3",
diff --git a/src/unstract/sdk/__init__.py b/src/unstract/sdk/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "v0.71.1"
+__version__ = "v0.72.0"
 
 
 def get_sdk_version() -> str:
diff --git a/src/unstract/sdk/adapters/llm/open_ai/src/open_ai.py b/src/unstract/sdk/adapters/llm/open_ai/src/open_ai.py
@@ -20,6 +20,8 @@ class Constants:
     API_BASE = "api_base"
     API_VERSION = "api_version"
     MAX_TOKENS = "max_tokens"
+    RESONING_EFFORT = "reasoning_effort"
+    ENABLE_REASONING = "enable_reasoning"
 
 
 class OpenAILLM(LLMAdapter):
@@ -54,6 +56,7 @@ def get_llm_instance(self) -> LLM:
             max_tokens = self.config.get(Constants.MAX_TOKENS)
             max_tokens = int(max_tokens) if max_tokens else None
             model = str(self.config.get(Constants.MODEL))
+            enable_reasoning = self.config.get(Constants.ENABLE_REASONING)
 
             llm_kwargs = {
                 "model": model,
@@ -70,10 +73,15 @@ def get_llm_instance(self) -> LLM:
                 "max_tokens": max_tokens,
             }
 
-            # O-series models default to temperature=1, ignoring passed values, so it's not set explicitly.
+            # O-series models default to temperature=1
             if model not in O1_MODELS:
                 llm_kwargs["temperature"] = 0
 
+            if enable_reasoning:
+                llm_kwargs["reasoning_effort"] = self.config.get(
+                    Constants.RESONING_EFFORT
+                )
+
             llm = OpenAI(**llm_kwargs)
             return llm
         except Exception as e:
diff --git a/src/unstract/sdk/adapters/llm/open_ai/src/static/json_schema.json b/src/unstract/sdk/adapters/llm/open_ai/src/static/json_schema.json
@@ -60,6 +60,53 @@
       "title": "Timeout",
       "default": 900,
       "description": "Timeout in seconds"
+    },
+    "enable_reasoning": {
+      "type": "boolean",
+      "title": "Enable Reasoning",
+      "default": false,
+      "description": "Allow the model to apply extra reasoning for complex tasks. May slightly increase latency and cost, typically within 20–50% depending on the level selected. Only applicable for [O series models](https://platform.openai.com/docs/models#reasoning)."
+    }
+  },
+  "allOf": [
+    {
+      "if": {
+        "properties": {
+          "enable_reasoning": {
+            "const": true
+          }
+        }
+      },
+      "then": {
+        "properties": {
+          "reasoning_effort": {
+            "type": "string",
+            "enum": [
+              "low",
+              "medium",
+              "high"
+            ],
+            "default": "medium",
+            "title": "Reasoning Effort",
+            "description": "Sets the Reasoning Strength when Reasoning Effort is enabled"
+          }
+        },
+        "required": [
+          "reasoning_effort"
+        ]
+      }
+    },
+    {
+      "if": {
+        "properties": {
+          "enable_reasoning": {
+            "const": false
+          }
+        }
+      },
+      "then": {
+        "properties": {}
+      }
     }
-  }
+  ]
 }
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "v0.71.1"`
	`1`	`+__version__ = "v0.72.0"`
`2`	`2`
`3`	`3`
`4`	`4`	`def get_sdk_version() -> str:`