Skip to content

Commit bb327e8

Browse files
[FEAT] Support for thinking/reasoning capabilities for LLMs in Bedrock and AzureOpenAI (#188)
* Updated llama-index packages to support reasoning models * Added temperature in json of azure and added thinking support for bedrock claude models * Migrated to BedrockConverse * Updated version of the sdk * Added reasoning effort parameter for azure openai * Update src/unstract/sdk/adapters/llm/azure_open_ai/src/azure_open_ai.py Co-authored-by: Ritwik G <[email protected]> Signed-off-by: Praveen Kumar <[email protected]> * Updated sdk version from 0.72.1 to 0.73.0 * Fixed issue with additonal_kwargs * Updated anthropic llama-index version to support claude sonnet 4 --------- Signed-off-by: Praveen Kumar <[email protected]> Co-authored-by: Ritwik G <[email protected]>
1 parent 45cefdc commit bb327e8

File tree

9 files changed

+210
-93
lines changed

9 files changed

+210
-93
lines changed

pyproject.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ dependencies = [
2626
"python-magic~=0.4.27",
2727
"python-dotenv==1.0.0",
2828
# Adapter changes
29-
"llama-index==0.12.37",
29+
"llama-index==0.12.39",
3030
"tiktoken~=0.9.0",
3131
"transformers==4.37.0",
3232
"llama-index-embeddings-google==0.3.0",
@@ -48,17 +48,17 @@ dependencies = [
4848
"llama-index-vector-stores-weaviate==1.3.1",
4949
"llama-index-vector-stores-pinecone==0.4.2",
5050
"llama-index-vector-stores-qdrant==0.4.2",
51-
"llama-index-llms-openai==0.3.42",
51+
"llama-index-llms-openai==0.4.1",
5252
"llama-index-llms-palm==0.3.0",
5353
"llama-index-llms-mistralai==0.3.1",
5454
"mistralai==1.2.5",
55-
"llama-index-llms-anyscale==0.3.0",
56-
"llama-index-llms-anthropic==0.6.14",
57-
"llama-index-llms-azure-openai==0.3.2",
55+
"llama-index-llms-anyscale==0.3.1",
56+
"llama-index-llms-anthropic==0.7.2",
57+
"llama-index-llms-azure-openai==0.3.3",
5858
"llama-index-llms-vertex==0.4.6",
5959
"llama-index-llms-replicate==0.4.0",
6060
"llama-index-llms-ollama==0.5.0",
61-
"llama-index-llms-bedrock==0.3.3",
61+
"llama-index-llms-bedrock-converse==0.7.1",
6262
# For Llama Parse X2Text
6363
"llama-parse==0.5.19",
6464
# OCR

src/unstract/sdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "v0.72.0"
1+
__version__ = "v0.73.0"
22

33

44
def get_sdk_version() -> str:

src/unstract/sdk/adapters/llm/azure_open_ai/src/azure_open_ai.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from llama_index.core.llms import LLM
55
from llama_index.llms.azure_openai import AzureOpenAI
6+
from llama_index.llms.openai.utils import O1_MODELS
67

78
from unstract.sdk.adapters.exceptions import AdapterError
89
from unstract.sdk.adapters.llm.constants import LLMKeys
@@ -20,6 +21,8 @@ class Constants:
2021
API_TYPE = "azure"
2122
TIMEOUT = "timeout"
2223
DEFAULT_MODEL = "gpt-35-turbo"
24+
ENABLE_REASONING = "enable_reasoning"
25+
REASONING_EFFORT = "reasoning_effort"
2326

2427

2528
class AzureOpenAILLM(LLMAdapter):
@@ -55,21 +58,33 @@ def get_llm_instance(self) -> LLM:
5558
)
5659
max_tokens = self.config.get(Constants.MAX_TOKENS)
5760
max_tokens = int(max_tokens) if max_tokens else None
61+
enable_reasoning = self.config.get(Constants.ENABLE_REASONING)
62+
model = self.config.get(Constants.MODEL, Constants.DEFAULT_MODEL)
63+
64+
llm_kwargs = {
65+
"model": model,
66+
"deployment_name": str(self.config.get(Constants.DEPLOYMENT_NAME)),
67+
"api_key": str(self.config.get(Constants.API_KEY)),
68+
"api_version": str(self.config.get(Constants.API_VERSION)),
69+
"azure_endpoint": str(self.config.get(Constants.AZURE_ENDPONT)),
70+
"api_type": Constants.API_TYPE,
71+
"temperature": 0,
72+
"timeout": float(self.config.get(Constants.TIMEOUT, LLMKeys.DEFAULT_TIMEOUT)),
73+
"max_retries": max_retries,
74+
}
75+
76+
if enable_reasoning:
77+
llm_kwargs["reasoning_effort"] = self.config.get(
78+
Constants.REASONING_EFFORT
79+
)
80+
81+
if model not in O1_MODELS:
82+
llm_kwargs["max_completion_tokens"] = max_tokens
83+
else:
84+
llm_kwargs["max_tokens"] = max_tokens
85+
5886
try:
59-
llm: LLM = AzureOpenAI(
60-
model=self.config.get(Constants.MODEL, Constants.DEFAULT_MODEL),
61-
deployment_name=str(self.config.get(Constants.DEPLOYMENT_NAME)),
62-
api_key=str(self.config.get(Constants.API_KEY)),
63-
api_version=str(self.config.get(Constants.API_VERSION)),
64-
azure_endpoint=str(self.config.get(Constants.AZURE_ENDPONT)),
65-
api_type=Constants.API_TYPE,
66-
temperature=0,
67-
timeout=float(
68-
self.config.get(Constants.TIMEOUT, LLMKeys.DEFAULT_TIMEOUT)
69-
),
70-
max_retries=max_retries,
71-
max_tokens=max_tokens,
72-
)
87+
llm: LLM = AzureOpenAI(**llm_kwargs)
7388
return llm
7489
except Exception as e:
7590
raise AdapterError(str(e))

src/unstract/sdk/adapters/llm/azure_open_ai/src/static/json_schema.json

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,53 @@
6767
"title": "Timeout",
6868
"default": 900,
6969
"description": "Timeout in seconds"
70+
},
71+
"enable_reasoning": {
72+
"type": "boolean",
73+
"title": "Enable Reasoning",
74+
"default": false,
75+
"description": "Allow the model to apply extra reasoning for complex tasks. May slightly increase latency and cost, typically within 20–50% depending on the level selected. Only applicable for [O series models](https://platform.openai.com/docs/models#reasoning)."
76+
}
77+
},
78+
"allOf": [
79+
{
80+
"if": {
81+
"properties": {
82+
"enable_reasoning": {
83+
"const": true
84+
}
85+
}
86+
},
87+
"then": {
88+
"properties": {
89+
"reasoning_effort": {
90+
"type": "string",
91+
"enum": [
92+
"low",
93+
"medium",
94+
"high"
95+
],
96+
"default": "medium",
97+
"title": "Reasoning Effort",
98+
"description": "Sets the Reasoning Strength when Reasoning Effort is enabled"
99+
}
100+
},
101+
"required": [
102+
"reasoning_effort"
103+
]
104+
}
105+
},
106+
{
107+
"if": {
108+
"properties": {
109+
"enable_reasoning": {
110+
"const": false
111+
}
112+
}
113+
},
114+
"then": {
115+
"properties": {}
116+
}
70117
}
71-
}
118+
]
72119
}

src/unstract/sdk/adapters/llm/bedrock/src/bedrock.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import Any
33

44
from llama_index.core.llms import LLM
5-
from llama_index.llms.bedrock import Bedrock
5+
from llama_index.llms.bedrock_converse import BedrockConverse
66
from unstract.sdk.adapters.exceptions import AdapterError
77
from unstract.sdk.adapters.llm.constants import LLMKeys
88
from unstract.sdk.adapters.llm.llm_adapter import LLMAdapter
@@ -19,6 +19,8 @@ class Constants:
1919
CONTEXT_SIZE = "context_size"
2020
MAX_TOKENS = "max_tokens"
2121
DEFAULT_MAX_TOKENS = 512 # Default at llama-index
22+
ENABLE_THINKING = "enable_thinking"
23+
BUDGET_TOKENS = "budget_tokens"
2224

2325

2426
class BedrockLLM(LLMAdapter):
@@ -49,16 +51,28 @@ def get_icon() -> str:
4951
return "/icons/adapter-icons/Bedrock.png"
5052

5153
def get_llm_instance(self) -> LLM:
54+
55+
thinking = self.config.get(Constants.ENABLE_THINKING)
56+
thinking_dict = None
57+
temperature = 0
58+
additional_kwargs = None
59+
60+
if thinking:
61+
additional_kwargs = {
62+
"additionalModelRequestFields": {
63+
"thinking": {
64+
"type": "enabled",
65+
"budget_tokens": self.config.get(Constants.BUDGET_TOKENS)
66+
}
67+
}
68+
}
69+
temperature = 1
70+
5271
try:
53-
context_size: int | None = (
54-
int(self.config.get(Constants.CONTEXT_SIZE, 0))
55-
if self.config.get(Constants.CONTEXT_SIZE)
56-
else None
57-
)
5872
max_tokens = int(
5973
self.config.get(Constants.MAX_TOKENS, Constants.DEFAULT_MAX_TOKENS)
6074
)
61-
llm: LLM = Bedrock(
75+
llm: LLM = BedrockConverse(
6276
model=self.config.get(Constants.MODEL),
6377
aws_access_key_id=self.config.get(Constants.ACCESS_KEY_ID),
6478
aws_secret_access_key=self.config.get(Constants.SECRET_ACCESS_KEY),
@@ -69,9 +83,9 @@ def get_llm_instance(self) -> LLM:
6983
max_retries=int(
7084
self.config.get(Constants.MAX_RETRIES, LLMKeys.DEFAULT_MAX_RETRIES)
7185
),
72-
temperature=0,
73-
context_size=context_size,
86+
temperature=temperature,
7487
max_tokens=max_tokens,
88+
additional_kwargs=additional_kwargs,
7589
)
7690
return llm
7791
except Exception as e:

src/unstract/sdk/adapters/llm/bedrock/src/static/json_schema.json

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,6 @@
3838
"title": "AWS Region name",
3939
"description": "Provide the AWS Region name where the service is running. Eg. us-east-1"
4040
},
41-
"context_size": {
42-
"type": "number",
43-
"minimum": 0,
44-
"multipleOf": 1,
45-
"title": "Context Size",
46-
"description": "The maximum number of context (input) tokens for the model. For setting default in supported models, leave this empty."
47-
},
4841
"max_tokens": {
4942
"type": "number",
5043
"minimum": 0,
@@ -68,6 +61,49 @@
6861
"title": "Timeout",
6962
"default": 900,
7063
"description": "Timeout in seconds"
64+
},
65+
"enable_thinking": {
66+
"type": "boolean",
67+
"title": "Enable Extended Thinking",
68+
"default": false,
69+
"description": "Enhance reasoning for complex tasks with step-by-step transparency. Available only for Claude 3.7 Sonnet."
70+
}
71+
},
72+
"allOf": [
73+
{
74+
"if": {
75+
"properties": {
76+
"enable_thinking": {
77+
"const": true
78+
}
79+
}
80+
},
81+
"then": {
82+
"properties": {
83+
"budget_tokens": {
84+
"type": "number",
85+
"minimum": 1024,
86+
"default": 1024,
87+
"title": "Thinking Budget Tokens",
88+
"description": "Sets the max tokens for Claude's internal reasoning when thinking is enabled"
89+
}
90+
},
91+
"required": [
92+
"budget_tokens"
93+
]
94+
}
95+
},
96+
{
97+
"if": {
98+
"properties": {
99+
"enable_thinking": {
100+
"const": false
101+
}
102+
}
103+
},
104+
"then": {
105+
"properties": {}
106+
}
71107
}
72-
}
108+
]
73109
}

src/unstract/sdk/adapters/llm/llm_adapter.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,14 @@ def _test_llm_instance(llm: LLM | None) -> bool:
7676
if hasattr(llm, "model") and llm.model not in O1_MODELS:
7777
completion_kwargs["temperature"] = 0.003
7878

79-
if hasattr(llm, "thinking_dict") and llm.thinking_dict is not None:
79+
if (
80+
hasattr(llm, "thinking_dict") and llm.thinking_dict is not None
81+
) or (
82+
hasattr(llm, "additional_kwargs") and llm.additional_kwargs is not None
83+
):
8084
completion_kwargs["temperature"] = 1
8185

86+
8287
response = llm.complete("The capital of Tamilnadu is ", **completion_kwargs)
8388
response_lower_case: str = response.text.lower()
8489
find_match = re.search("chennai", response_lower_case)

src/unstract/sdk/adapters/llm/open_ai/src/open_ai.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class Constants:
2020
API_BASE = "api_base"
2121
API_VERSION = "api_version"
2222
MAX_TOKENS = "max_tokens"
23-
RESONING_EFFORT = "reasoning_effort"
23+
REASONING_EFFORT = "reasoning_effort"
2424
ENABLE_REASONING = "enable_reasoning"
2525

2626

@@ -79,7 +79,7 @@ def get_llm_instance(self) -> LLM:
7979

8080
if enable_reasoning:
8181
llm_kwargs["reasoning_effort"] = self.config.get(
82-
Constants.RESONING_EFFORT
82+
Constants.REASONING_EFFORT
8383
)
8484

8585
llm = OpenAI(**llm_kwargs)

0 commit comments

Comments
 (0)