Skip to content

Commit d5a839d

Browse files
Merge pull request #14700 from BerriAI/litellm_contributor_prs_09_18_2025_p2
Update Bedrock documentation for Titan V2 encoding_format support + Anthropic - account for 1h vs. 5m cache creation token cost difference + UI - add langsmith_sampling_rate as a dynamic param
2 parents b6d5a65 + 92e841e commit d5a839d

File tree

18 files changed

+797
-290
lines changed

18 files changed

+797
-290
lines changed

docs/my-website/docs/providers/bedrock.md

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,6 +1822,59 @@ Here's an example of using a bedrock model with LiteLLM. For a complete list, re
18221822
| Mixtral 8x7B Instruct | `completion(model='bedrock/mistral.mixtral-8x7b-instruct-v0:1', messages=messages)` | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
18231823

18241824

1825+
## Bedrock Embedding
1826+
1827+
### API keys
1828+
This can be set as env variables or passed as **params to litellm.embedding()**
1829+
```python
1830+
import os
1831+
os.environ["AWS_ACCESS_KEY_ID"] = "" # Access key
1832+
os.environ["AWS_SECRET_ACCESS_KEY"] = "" # Secret access key
1833+
os.environ["AWS_REGION_NAME"] = "" # us-east-1, us-east-2, us-west-1, us-west-2
1834+
```
1835+
1836+
### Usage
1837+
```python
1838+
from litellm import embedding
1839+
response = embedding(
1840+
model="bedrock/amazon.titan-embed-text-v1",
1841+
input=["good morning from litellm"],
1842+
)
1843+
print(response)
1844+
```
1845+
1846+
#### Titan V2 - encoding_format support
1847+
```python
1848+
from litellm import embedding
1849+
# Float format (default)
1850+
response = embedding(
1851+
model="bedrock/amazon.titan-embed-text-v2:0",
1852+
input=["good morning from litellm"],
1853+
encoding_format="float" # Returns float array
1854+
)
1855+
1856+
# Binary format
1857+
response = embedding(
1858+
model="bedrock/amazon.titan-embed-text-v2:0",
1859+
input=["good morning from litellm"],
1860+
encoding_format="base64" # Returns base64 encoded binary
1861+
)
1862+
```
1863+
1864+
## Supported AWS Bedrock Embedding Models
1865+
1866+
| Model Name | Usage | Supported Additional OpenAI params |
1867+
|----------------------|---------------------------------------------|-----|
1868+
| Titan Embeddings V2 | `embedding(model="bedrock/amazon.titan-embed-text-v2:0", input=input)` | `dimensions`, `encoding_format` |
1869+
| Titan Embeddings - V1 | `embedding(model="bedrock/amazon.titan-embed-text-v1", input=input)` | [here](https://github.com/BerriAI/litellm/blob/f5905e100068e7a4d61441d7453d7cf5609c2121/litellm/llms/bedrock/embed/amazon_titan_g1_transformation.py#L53)
1870+
| Titan Multimodal Embeddings | `embedding(model="bedrock/amazon.titan-embed-image-v1", input=input)` | [here](https://github.com/BerriAI/litellm/blob/f5905e100068e7a4d61441d7453d7cf5609c2121/litellm/llms/bedrock/embed/amazon_titan_multimodal_transformation.py#L28) |
1871+
| Cohere Embeddings - English | `embedding(model="bedrock/cohere.embed-english-v3", input=input)` | [here](https://github.com/BerriAI/litellm/blob/f5905e100068e7a4d61441d7453d7cf5609c2121/litellm/llms/bedrock/embed/cohere_transformation.py#L18)
1872+
| Cohere Embeddings - Multilingual | `embedding(model="bedrock/cohere.embed-multilingual-v3", input=input)` | [here](https://github.com/BerriAI/litellm/blob/f5905e100068e7a4d61441d7453d7cf5609c2121/litellm/llms/bedrock/embed/cohere_transformation.py#L18)
1873+
1874+
### Advanced - [Drop Unsupported Params](https://docs.litellm.ai/docs/completion/drop_params#openai-proxy-usage)
1875+
1876+
### Advanced - [Pass model/provider-specific Params](https://docs.litellm.ai/docs/completion/provider_specific_params#proxy-usage)
1877+
18251878
## Image Generation
18261879
Use this for stable diffusion, and amazon nova canvas on bedrock
18271880

litellm/integrations/langsmith.py

Lines changed: 34 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def __init__(
3939
langsmith_api_key: Optional[str] = None,
4040
langsmith_project: Optional[str] = None,
4141
langsmith_base_url: Optional[str] = None,
42+
langsmith_sampling_rate: Optional[float] = None,
4243
**kwargs,
4344
):
4445
self.flush_lock = asyncio.Lock()
@@ -49,7 +50,8 @@ def __init__(
4950
langsmith_base_url=langsmith_base_url,
5051
)
5152
self.sampling_rate: float = (
52-
float(os.getenv("LANGSMITH_SAMPLING_RATE")) # type: ignore
53+
langsmith_sampling_rate
54+
or float(os.getenv("LANGSMITH_SAMPLING_RATE")) # type: ignore
5355
if os.getenv("LANGSMITH_SAMPLING_RATE") is not None
5456
and os.getenv("LANGSMITH_SAMPLING_RATE").strip().isdigit() # type: ignore
5557
else 1.0
@@ -76,26 +78,14 @@ def get_credentials_from_env(
7678
langsmith_base_url: Optional[str] = None,
7779
) -> LangsmithCredentialsObject:
7880
_credentials_api_key = langsmith_api_key or os.getenv("LANGSMITH_API_KEY")
79-
if _credentials_api_key is None:
80-
raise Exception(
81-
"Invalid Langsmith API Key given. _credentials_api_key=None."
82-
)
8381
_credentials_project = (
8482
langsmith_project or os.getenv("LANGSMITH_PROJECT") or "litellm-completion"
8583
)
86-
if _credentials_project is None:
87-
raise Exception(
88-
"Invalid Langsmith API Key given. _credentials_project=None."
89-
)
9084
_credentials_base_url = (
9185
langsmith_base_url
9286
or os.getenv("LANGSMITH_BASE_URL")
9387
or "https://api.smith.langchain.com"
9488
)
95-
if _credentials_base_url is None:
96-
raise Exception(
97-
"Invalid Langsmith API Key given. _credentials_base_url=None."
98-
)
9989

10090
return LangsmithCredentialsObject(
10191
LANGSMITH_API_KEY=_credentials_api_key,
@@ -200,12 +190,7 @@ def _prepare_log_data(
200190

201191
def log_success_event(self, kwargs, response_obj, start_time, end_time):
202192
try:
203-
sampling_rate = (
204-
float(os.getenv("LANGSMITH_SAMPLING_RATE")) # type: ignore
205-
if os.getenv("LANGSMITH_SAMPLING_RATE") is not None
206-
and os.getenv("LANGSMITH_SAMPLING_RATE").strip().isdigit() # type: ignore
207-
else 1.0
208-
)
193+
sampling_rate = self._get_sampling_rate_to_use_for_request(kwargs=kwargs)
209194
random_sample = random.random()
210195
if random_sample > sampling_rate:
211196
verbose_logger.info(
@@ -219,6 +204,7 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time):
219204
kwargs,
220205
response_obj,
221206
)
207+
222208
credentials = self._get_credentials_to_use_for_request(kwargs=kwargs)
223209
data = self._prepare_log_data(
224210
kwargs=kwargs,
@@ -245,7 +231,7 @@ def log_success_event(self, kwargs, response_obj, start_time, end_time):
245231

246232
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
247233
try:
248-
sampling_rate = self.sampling_rate
234+
sampling_rate = self._get_sampling_rate_to_use_for_request(kwargs=kwargs)
249235
random_sample = random.random()
250236
if random_sample > sampling_rate:
251237
verbose_logger.info(
@@ -286,7 +272,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
286272
)
287273

288274
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
289-
sampling_rate = self.sampling_rate
275+
sampling_rate = self._get_sampling_rate_to_use_for_request(kwargs=kwargs)
290276
random_sample = random.random()
291277
if random_sample > sampling_rate:
292278
verbose_logger.info(
@@ -417,6 +403,17 @@ def _group_batches_by_credentials(self) -> Dict[CredentialsKey, BatchGroup]:
417403

418404
for queue_object in self.log_queue:
419405
credentials = queue_object["credentials"]
406+
# if credential missing, skip - log warning
407+
if (
408+
credentials["LANGSMITH_API_KEY"] is None
409+
or credentials["LANGSMITH_PROJECT"] is None
410+
):
411+
verbose_logger.warning(
412+
"Langsmith Logging - credentials missing - api_key: %s, project: %s",
413+
credentials["LANGSMITH_API_KEY"],
414+
credentials["LANGSMITH_PROJECT"],
415+
)
416+
continue
420417
key = CredentialsKey(
421418
api_key=credentials["LANGSMITH_API_KEY"],
422419
project=credentials["LANGSMITH_PROJECT"],
@@ -432,6 +429,19 @@ def _group_batches_by_credentials(self) -> Dict[CredentialsKey, BatchGroup]:
432429

433430
return log_queue_by_credentials
434431

432+
def _get_sampling_rate_to_use_for_request(self, kwargs: Dict[str, Any]) -> float:
433+
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
434+
kwargs.get("standard_callback_dynamic_params", None)
435+
)
436+
sampling_rate: float = self.sampling_rate
437+
if standard_callback_dynamic_params is not None:
438+
_sampling_rate = standard_callback_dynamic_params.get(
439+
"langsmith_sampling_rate"
440+
)
441+
if _sampling_rate is not None:
442+
sampling_rate = float(_sampling_rate)
443+
return sampling_rate
444+
435445
def _get_credentials_to_use_for_request(
436446
self, kwargs: Dict[str, Any]
437447
) -> LangsmithCredentialsObject:
@@ -442,9 +452,9 @@ def _get_credentials_to_use_for_request(
442452
443453
Otherwise, use the default credentials.
444454
"""
445-
standard_callback_dynamic_params: Optional[
446-
StandardCallbackDynamicParams
447-
] = kwargs.get("standard_callback_dynamic_params", None)
455+
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
456+
kwargs.get("standard_callback_dynamic_params", None)
457+
)
448458
if standard_callback_dynamic_params is not None:
449459
credentials = self.get_credentials_from_env(
450460
langsmith_api_key=standard_callback_dynamic_params.get(

0 commit comments

Comments
 (0)