Skip to content

Commit 2bee7d0

Browse files
authored
bugfx/allow for optional api keys in embedders (#479)
* allow for optional api keys in embedders * upgrade deps * fix import * pin ES
1 parent 692ecf5 commit 2bee7d0

File tree

9 files changed

+1107
-923
lines changed

9 files changed

+1107
-923
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 1.0.17
2+
3+
* **Support optional API keys for embedders**
4+
15
## 1.0.16
26

37
* **Add embedder config field descriptions**
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
elasticsearch[async]
1+
elasticsearch[async]<9.0.0

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.0.16" # pragma: no cover
1+
__version__ = "1.0.17" # pragma: no cover

unstructured_ingest/embed/azure_openai.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
2525
def get_client(self) -> "AzureOpenAI":
2626
from openai import AzureOpenAI
2727

28+
api_key = self.api_key.get_secret_value() if self.api_key else None
2829
return AzureOpenAI(
29-
api_key=self.api_key.get_secret_value(),
30+
api_key=api_key,
3031
api_version=self.api_version,
3132
azure_endpoint=self.azure_endpoint,
3233
)
@@ -35,8 +36,9 @@ def get_client(self) -> "AzureOpenAI":
3536
def get_async_client(self) -> "AsyncAzureOpenAI":
3637
from openai import AsyncAzureOpenAI
3738

39+
api_key = self.api_key.get_secret_value() if self.api_key else None
3840
return AsyncAzureOpenAI(
39-
api_key=self.api_key.get_secret_value(),
41+
api_key=api_key,
4042
api_version=self.api_version,
4143
azure_endpoint=self.azure_endpoint,
4244
)

unstructured_ingest/embed/octoai.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from dataclasses import dataclass
2-
from typing import TYPE_CHECKING
2+
from typing import TYPE_CHECKING, Optional
33

44
from pydantic import Field, SecretStr
55

@@ -24,7 +24,7 @@
2424

2525

2626
class OctoAiEmbeddingConfig(EmbeddingConfig):
27-
api_key: SecretStr
27+
api_key: Optional[SecretStr] = Field(description="API key for OctoAI", default=None)
2828
embedder_model_name: str = Field(
2929
default="thenlper/gte-large", alias="model_name", description="octoai model name"
3030
)
@@ -77,7 +77,8 @@ def get_client(self) -> "OpenAI":
7777
"""Creates an OpenAI python client to embed elements. Uses the OpenAI SDK."""
7878
from openai import OpenAI
7979

80-
return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
80+
api_key = self.api_key.get_secret_value() if self.api_key else None
81+
return OpenAI(api_key=api_key, base_url=self.base_url)
8182

8283
@requires_dependencies(
8384
["openai", "tiktoken"],
@@ -87,7 +88,8 @@ def get_async_client(self) -> "AsyncOpenAI":
8788
"""Creates an OpenAI python client to embed elements. Uses the OpenAI SDK."""
8889
from openai import AsyncOpenAI
8990

90-
return AsyncOpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
91+
api_key = self.api_key.get_secret_value() if self.api_key else None
92+
return AsyncOpenAI(api_key=api_key, base_url=self.base_url)
9193

9294

9395
@dataclass

unstructured_ingest/embed/openai.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525

2626
class OpenAIEmbeddingConfig(EmbeddingConfig):
27-
api_key: SecretStr = Field(description="API key for OpenAI")
27+
api_key: Optional[SecretStr] = Field(description="API key for OpenAI", default=None)
2828
embedder_model_name: str = Field(
2929
default="text-embedding-ada-002", alias="model_name", description="OpenAI model name"
3030
)
@@ -88,13 +88,15 @@ def run_precheck(self) -> None:
8888
def get_client(self) -> "OpenAI":
8989
from openai import OpenAI
9090

91-
return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
91+
api_key = self.api_key.get_secret_value() if self.api_key else None
92+
return OpenAI(api_key=api_key, base_url=self.base_url)
9293

9394
@requires_dependencies(["openai"], extras="openai")
9495
def get_async_client(self) -> "AsyncOpenAI":
9596
from openai import AsyncOpenAI
9697

97-
return AsyncOpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
98+
api_key = self.api_key.get_secret_value() if self.api_key else None
99+
return AsyncOpenAI(api_key=api_key, base_url=self.base_url)
98100

99101

100102
@dataclass

unstructured_ingest/embed/togetherai.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from dataclasses import dataclass
2-
from typing import TYPE_CHECKING, Any
2+
from typing import TYPE_CHECKING, Any, Optional
33

44
from pydantic import Field, SecretStr
55

@@ -20,7 +20,7 @@
2020

2121

2222
class TogetherAIEmbeddingConfig(EmbeddingConfig):
23-
api_key: SecretStr = Field(description="API key for Together AI")
23+
api_key: Optional[SecretStr] = Field(description="API key for Together AI", default=None)
2424
embedder_model_name: str = Field(
2525
default="togethercomputer/m2-bert-80M-8k-retrieval",
2626
alias="model_name",
@@ -58,13 +58,15 @@ def run_precheck(self) -> None:
5858
def get_client(self) -> "Together":
5959
from together import Together
6060

61-
return Together(api_key=self.api_key.get_secret_value())
61+
api_key = self.api_key.get_secret_value() if self.api_key else None
62+
return Together(api_key=api_key)
6263

6364
@requires_dependencies(["together"], extras="togetherai")
6465
def get_async_client(self) -> "AsyncTogether":
6566
from together import AsyncTogether
6667

67-
return AsyncTogether(api_key=self.api_key.get_secret_value())
68+
api_key = self.api_key.get_secret_value() if self.api_key else None
69+
return AsyncTogether(api_key=api_key)
6870

6971

7072
@dataclass

unstructured_ingest/embed/voyageai.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class VoyageAIEmbeddingConfig(EmbeddingConfig):
2626
le=128,
2727
description="Batch size for embedding requests. VoyageAI has a limit of 128.",
2828
)
29-
api_key: SecretStr = Field(description="API key for VoyageAI")
29+
api_key: Optional[SecretStr] = Field(description="API key for VoyageAI", default=None)
3030
embedder_model_name: str = Field(
3131
default="voyage-3", alias="model_name", description="VoyageAI model name"
3232
)
@@ -65,8 +65,9 @@ def get_client(self) -> "VoyageAIClient":
6565
"""Creates a VoyageAI python client to embed elements."""
6666
from voyageai import Client as VoyageAIClient
6767

68+
api_key = self.api_key.get_secret_value() if self.api_key else None
6869
client = VoyageAIClient(
69-
api_key=self.api_key.get_secret_value(),
70+
api_key=api_key,
7071
max_retries=self.max_retries,
7172
timeout=self.timeout_in_seconds,
7273
)
@@ -80,8 +81,9 @@ def get_async_client(self) -> "AsyncVoyageAIClient":
8081
"""Creates a VoyageAI python client to embed elements."""
8182
from voyageai import AsyncClient as AsyncVoyageAIClient
8283

84+
api_key = self.api_key.get_secret_value() if self.api_key else None
8385
client = AsyncVoyageAIClient(
84-
api_key=self.api_key.get_secret_value(),
86+
api_key=api_key,
8587
max_retries=self.max_retries,
8688
timeout=self.timeout_in_seconds,
8789
)

0 commit comments

Comments
 (0)