Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 1.1.0

- **Feature**: Embedding with OpenAI (or Azure OpenAI) can trust custom certificate authority by specifying environment variable REQUESTS_CA_BUNDLE.
## 1.0.57

- **test: Longer interval for pinecone integration tests**
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,9 @@ fail_under = 0

[tool.hatch.build.targets.sdist]
packages = ["/unstructured_ingest"]

[[tool.uv.index]]
name = "PyPi"
url = "https://pypi.org/simple"
explicit = false
default = true
1 change: 1 addition & 0 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ pydantic>=2.7
tqdm
click
opentelemetry-sdk
certifi>=2025.7.14
2 changes: 1 addition & 1 deletion unstructured_ingest/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.57" # pragma: no cover
__version__ = "1.1.0" # pragma: no cover
9 changes: 7 additions & 2 deletions unstructured_ingest/embed/azure_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
OpenAIEmbeddingEncoder,
)
from unstructured_ingest.utils.dep_check import requires_dependencies
from unstructured_ingest.utils.tls import ssl_context_with_optional_ca_override

if TYPE_CHECKING:
from openai import AsyncAzureOpenAI, AzureOpenAI
Expand All @@ -23,19 +24,23 @@ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):

@requires_dependencies(["openai"], extras="openai")
def get_client(self) -> "AzureOpenAI":
from openai import AzureOpenAI
from openai import AzureOpenAI, DefaultHttpxClient

client = DefaultHttpxClient(verify=ssl_context_with_optional_ca_override())
return AzureOpenAI(
http_client=client,
api_key=self.api_key.get_secret_value(),
api_version=self.api_version,
azure_endpoint=self.azure_endpoint,
)

@requires_dependencies(["openai"], extras="openai")
def get_async_client(self) -> "AsyncAzureOpenAI":
from openai import AsyncAzureOpenAI
from openai import AsyncAzureOpenAI, DefaultAsyncHttpxClient

client = DefaultAsyncHttpxClient(verify=ssl_context_with_optional_ca_override())
return AsyncAzureOpenAI(
http_client=client,
api_key=self.api_key.get_secret_value(),
api_version=self.api_version,
azure_endpoint=self.azure_endpoint,
Expand Down
15 changes: 11 additions & 4 deletions unstructured_ingest/embed/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
)
from unstructured_ingest.logger import logger
from unstructured_ingest.utils.dep_check import requires_dependencies
from unstructured_ingest.utils.tls import ssl_context_with_optional_ca_override

if TYPE_CHECKING:
from openai import AsyncOpenAI, OpenAI
Expand Down Expand Up @@ -86,15 +87,21 @@ def run_precheck(self) -> None:

@requires_dependencies(["openai"], extras="openai")
def get_client(self) -> "OpenAI":
from openai import OpenAI
from openai import DefaultHttpxClient, OpenAI

return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
client = DefaultHttpxClient(verify=ssl_context_with_optional_ca_override())
return OpenAI(
api_key=self.api_key.get_secret_value(), http_client=client, base_url=self.base_url
)

@requires_dependencies(["openai"], extras="openai")
def get_async_client(self) -> "AsyncOpenAI":
from openai import AsyncOpenAI
from openai import AsyncOpenAI, DefaultAsyncHttpxClient

return AsyncOpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
client = DefaultAsyncHttpxClient(verify=ssl_context_with_optional_ca_override())
return AsyncOpenAI(
api_key=self.api_key.get_secret_value(), http_client=client, base_url=self.base_url
)


@dataclass
Expand Down
15 changes: 15 additions & 0 deletions unstructured_ingest/utils/tls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os
import ssl

import certifi


def ssl_context_with_optional_ca_override():
"""
# https://www.python-httpx.org/advanced/ssl/#working-with-ssl_cert_file-and-ssl_cert_dir
# We choose REQUESTS_CA_BUNDLE because that works with many other Python packages.
"""
return ssl.create_default_context(
cafile=os.environ.get("REQUESTS_CA_BUNDLE", certifi.where()),
capath=os.environ.get("REQUESTS_CA_BUNDLE"),
)
4,286 changes: 2,391 additions & 1,895 deletions uv.lock

Large diffs are not rendered by default.