@@ -16,6 +16,7 @@ class EmbedderConfig(BaseModel):
1616 embedding_provider : Optional [
1717 Literal [
1818 "openai" ,
19+ "azure-openai" ,
1920 "huggingface" ,
2021 "aws-bedrock" ,
2122 "vertexai" ,
@@ -43,6 +44,14 @@ class EmbedderConfig(BaseModel):
4344 embedding_aws_region : Optional [str ] = Field (
4445 default = "us-west-2" , description = "AWS region used for AWS-based embedders, such as bedrock"
4546 )
47+ embedding_azure_endpoint : Optional [str ] = Field (
48+ default = None ,
49+ description = "Your Azure endpoint, including the resource, "
50+ "e.g. `https://example-resource.azure.openai.com/`" ,
51+ )
52+ embedding_azure_api_version : Optional [str ] = Field (
53+ description = "Azure API version" , default = None
54+ )
4655
4756 def get_huggingface_embedder (self , embedding_kwargs : dict ) -> "BaseEmbeddingEncoder" :
4857 from unstructured_ingest .embed .huggingface import (
@@ -59,6 +68,25 @@ def get_openai_embedder(self, embedding_kwargs: dict) -> "BaseEmbeddingEncoder":
5968
6069 return OpenAIEmbeddingEncoder (config = OpenAIEmbeddingConfig .model_validate (embedding_kwargs ))
6170
71+ def get_azure_openai_embedder (self , embedding_kwargs : dict ) -> "BaseEmbeddingEncoder" :
72+ from unstructured_ingest .embed .azure_openai import (
73+ AzureOpenAIEmbeddingConfig ,
74+ AzureOpenAIEmbeddingEncoder ,
75+ )
76+
77+ config_kwargs = {
78+ "api_key" : self .embedding_api_key ,
79+ "azure_endpoint" : self .embedding_azure_endpoint ,
80+ }
81+ if api_version := self .embedding_azure_api_version :
82+ config_kwargs ["api_version" ] = api_version
83+ if model_name := self .embedding_model_name :
84+ config_kwargs ["model_name" ] = model_name
85+
86+ return AzureOpenAIEmbeddingEncoder (
87+ config = AzureOpenAIEmbeddingConfig .model_validate (config_kwargs )
88+ )
89+
6290 def get_octoai_embedder (self , embedding_kwargs : dict ) -> "BaseEmbeddingEncoder" :
6391 from unstructured_ingest .embed .octoai import OctoAiEmbeddingConfig , OctoAIEmbeddingEncoder
6492
@@ -146,6 +174,8 @@ def get_embedder(self) -> "BaseEmbeddingEncoder":
146174 return self .get_mixedbread_embedder (embedding_kwargs = kwargs )
147175 if self .embedding_provider == "togetherai" :
148176 return self .get_togetherai_embedder (embedding_kwargs = kwargs )
177+ if self .embedding_provider == "azure-openai" :
178+ return self .get_azure_openai_embedder (embedding_kwargs = kwargs )
149179
150180 raise ValueError (f"{ self .embedding_provider } not a recognized encoder" )
151181
0 commit comments