Skip to content

Commit 692ecf5

Browse files
authored
add descriptions to all embedder config pydantic model fields (#478)
1 parent 1722a35 commit 692ecf5

File tree

12 files changed

+56
-26
lines changed

12 files changed

+56
-26
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 1.0.16
2+
3+
* **Add embedder config field descriptions**
4+
15
## 1.0.15
26

37
### Fixes

requirements/base.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
python-dateutil
22
# Pydantic generic Secret only introduced in 2.7
33
pydantic>=2.7
4-
dataclasses_json
54
tqdm
65
click
76
opentelemetry-sdk

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.0.15" # pragma: no cover
1+
__version__ = "1.0.16" # pragma: no cover

unstructured_ingest/embed/azure_openai.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616

1717
class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
1818
api_version: str = Field(description="Azure API version", default="2024-06-01")
19-
azure_endpoint: str
20-
embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
19+
azure_endpoint: str = Field(description="Azure endpoint")
20+
embedder_model_name: str = Field(
21+
default="text-embedding-ada-002", alias="model_name", description="Azure OpenAI model name"
22+
)
2123

2224
@requires_dependencies(["openai"], extras="openai")
2325
def get_client(self) -> "AzureOpenAI":

unstructured_ingest/embed/bedrock.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,14 @@ def conform_query(query: str, provider: str) -> dict:
5858

5959

6060
class BedrockEmbeddingConfig(EmbeddingConfig):
61-
aws_access_key_id: SecretStr
62-
aws_secret_access_key: SecretStr
63-
region_name: str = "us-west-2"
64-
embedder_model_name: str = Field(default="amazon.titan-embed-text-v1", alias="model_name")
61+
aws_access_key_id: SecretStr = Field(description="aws access key id")
62+
aws_secret_access_key: SecretStr = Field(description="aws secret access key")
63+
region_name: str = Field(description="aws region name", default="us-west-2")
64+
embedder_model_name: str = Field(
65+
default="amazon.titan-embed-text-v1",
66+
alias="model_name",
67+
description="AWS Bedrock model name",
68+
)
6569

6670
def wrap_error(self, e: Exception) -> Exception:
6771
if is_internal_error(e=e):

unstructured_ingest/embed/huggingface.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,18 @@
1515

1616

1717
class HuggingFaceEmbeddingConfig(EmbeddingConfig):
18-
embedder_model_name: Optional[str] = Field(default="all-MiniLM-L6-v2", alias="model_name")
18+
embedder_model_name: Optional[str] = Field(
19+
default="all-MiniLM-L6-v2", alias="model_name", description="HuggingFace model name"
20+
)
1921
embedder_model_kwargs: Optional[dict] = Field(
20-
default_factory=lambda: {"device": "cpu"}, alias="model_kwargs"
22+
default_factory=lambda: {"device": "cpu"},
23+
alias="model_kwargs",
24+
description="additional model parameters",
25+
)
26+
encode_kwargs: Optional[dict] = Field(
27+
default_factory=lambda: {"normalize_embeddings": False},
28+
description="additional embedding parameters",
2129
)
22-
encode_kwargs: Optional[dict] = Field(default_factory=lambda: {"normalize_embeddings": False})
2330

2431
@requires_dependencies(
2532
["sentence_transformers"],

unstructured_ingest/embed/mixedbreadai.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,13 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
3434

3535
api_key: SecretStr = Field(
3636
default_factory=lambda: SecretStr(os.environ.get("MXBAI_API_KEY")),
37+
description="API key for Mixedbread AI",
3738
)
3839

3940
embedder_model_name: str = Field(
40-
default="mixedbread-ai/mxbai-embed-large-v1", alias="model_name"
41+
default="mixedbread-ai/mxbai-embed-large-v1",
42+
alias="model_name",
43+
description="Mixedbread AI model name",
4144
)
4245

4346
@requires_dependencies(

unstructured_ingest/embed/octoai.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,12 @@
2525

2626
class OctoAiEmbeddingConfig(EmbeddingConfig):
2727
api_key: SecretStr
28-
embedder_model_name: str = Field(default="thenlper/gte-large", alias="model_name")
29-
base_url: str = Field(default="https://text.octoai.run/v1")
28+
embedder_model_name: str = Field(
29+
default="thenlper/gte-large", alias="model_name", description="octoai model name"
30+
)
31+
base_url: str = Field(
32+
default="https://text.octoai.run/v1", description="optional override for the base url"
33+
)
3034

3135
def wrap_error(self, e: Exception) -> Exception:
3236
if is_internal_error(e=e):

unstructured_ingest/embed/openai.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@
2424

2525

2626
class OpenAIEmbeddingConfig(EmbeddingConfig):
27-
api_key: SecretStr
28-
embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
29-
base_url: Optional[str] = None
27+
api_key: SecretStr = Field(description="API key for OpenAI")
28+
embedder_model_name: str = Field(
29+
default="text-embedding-ada-002", alias="model_name", description="OpenAI model name"
30+
)
31+
base_url: Optional[str] = Field(default=None, description="optional override for the base url")
3032

3133
@requires_dependencies(["openai"], extras="openai")
3234
def wrap_error(self, e: Exception) -> Exception:

unstructured_ingest/embed/togetherai.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020

2121

2222
class TogetherAIEmbeddingConfig(EmbeddingConfig):
23-
api_key: SecretStr
23+
api_key: SecretStr = Field(description="API key for Together AI")
2424
embedder_model_name: str = Field(
25-
default="togethercomputer/m2-bert-80M-8k-retrieval", alias="model_name"
25+
default="togethercomputer/m2-bert-80M-8k-retrieval",
26+
alias="model_name",
27+
description="Together AI model name",
2628
)
2729

2830
def wrap_error(self, e: Exception) -> Exception:

0 commit comments

Comments
 (0)