Skip to content

Commit 45ad913

Browse files
authored
Tests: add langchain azure blob doc loader (#180)
1 parent b1cbdae commit 45ad913

File tree

10 files changed

+52
-3
lines changed

10 files changed

+52
-3
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ jobs:
120120
OPEN_AI_KEY: "${{ secrets.E2E_TESTS_OPEN_AI_KEY }}"
121121
AZURE_OPEN_AI_KEY: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_KEY }}"
122122
AZURE_OPEN_AI_ENDPOINT: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_ENDPOINT }}"
123+
AZURE_BLOB_STORAGE_CONNECTION_STRING: "${{ secrets.E2E_TESTS_AZURE_BLOB_STORAGE_CONNECTION_STRING }}"
123124
GCLOUD_ACCOUNT_KEY_JSON: "${{ secrets.E2E_TESTS_GCLOUD_ACCOUNT_KEY_JSON }}"
124125
GOOGLE_API_KEY: "${{ secrets.E2E_TESTS_GOOGLE_API_KEY }}"
125126
AWS_ACCESS_KEY_ID: "${{ secrets.E2E_TESTS_AWS_ACCESS_KEY_ID }}"

.github/workflows/langchain-master-daily.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ jobs:
6262
OPEN_AI_KEY: "${{ secrets.E2E_TESTS_OPEN_AI_KEY }}"
6363
AZURE_OPEN_AI_KEY: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_KEY }}"
6464
AZURE_OPEN_AI_ENDPOINT: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_ENDPOINT }}"
65+
AZURE_BLOB_STORAGE_CONNECTION_STRING: "${{ secrets.E2E_TESTS_AZURE_BLOB_STORAGE_CONNECTION_STRING }}"
6566
GCLOUD_ACCOUNT_KEY_JSON: "${{ secrets.E2E_TESTS_GCLOUD_ACCOUNT_KEY_JSON }}"
6667
GOOGLE_API_KEY: "${{ secrets.E2E_TESTS_GOOGLE_API_KEY }}"
6768
AWS_ACCESS_KEY_ID: "${{ secrets.E2E_TESTS_AWS_ACCESS_KEY_ID }}"

.github/workflows/llamaindex-main-daily.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ jobs:
6262
OPEN_AI_KEY: "${{ secrets.E2E_TESTS_OPEN_AI_KEY }}"
6363
AZURE_OPEN_AI_KEY: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_KEY }}"
6464
AZURE_OPEN_AI_ENDPOINT: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_ENDPOINT }}"
65+
AZURE_BLOB_STORAGE_CONNECTION_STRING: "${{ secrets.E2E_TESTS_AZURE_BLOB_STORAGE_CONNECTION_STRING }}"
6566
GCLOUD_ACCOUNT_KEY_JSON: "${{ secrets.E2E_TESTS_GCLOUD_ACCOUNT_KEY_JSON }}"
6667
GOOGLE_API_KEY: "${{ secrets.E2E_TESTS_GOOGLE_API_KEY }}"
6768
AWS_ACCESS_KEY_ID: "${{ secrets.E2E_TESTS_AWS_ACCESS_KEY_ID }}"

.github/workflows/ragstack-ai-latest.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ jobs:
5656
OPEN_AI_KEY: "${{ secrets.E2E_TESTS_OPEN_AI_KEY }}"
5757
AZURE_OPEN_AI_KEY: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_KEY }}"
5858
AZURE_OPEN_AI_ENDPOINT: "${{ secrets.E2E_TESTS_AZURE_OPEN_AI_ENDPOINT }}"
59+
AZURE_BLOB_STORAGE_CONNECTION_STRING: "${{ secrets.E2E_TESTS_AZURE_BLOB_STORAGE_CONNECTION_STRING }}"
5960
GCLOUD_ACCOUNT_KEY_JSON: "${{ secrets.E2E_TESTS_GCLOUD_ACCOUNT_KEY_JSON }}"
6061
GOOGLE_API_KEY: "${{ secrets.E2E_TESTS_GOOGLE_API_KEY }}"
6162
AWS_ACCESS_KEY_ID: "${{ secrets.E2E_TESTS_AWS_ACCESS_KEY_ID }}"

ragstack-e2e-tests/e2e_tests/langchain/test_document_loaders.py

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import io
2+
import os
23
import tempfile
34
import uuid
45
from urllib.parse import urlparse
56

67
import boto3
7-
from e2e_tests.conftest import (
8-
set_current_test_info,
9-
)
8+
from azure.storage.blob import ContainerClient
9+
from e2e_tests.conftest import set_current_test_info, get_required_env
1010

1111
from langchain.document_loaders import CSVLoader, WebBaseLoader, S3DirectoryLoader
12+
from langchain_community.document_loaders import AzureBlobStorageContainerLoader
1213

1314

1415
def set_current_test_info_document_loader(doc_loader: str):
@@ -87,3 +88,41 @@ def test_s3_loader():
8788
finally:
8889
s3_obj.delete()
8990
bucket.delete()
91+
92+
93+
def test_azure_blob_doc_loader():
94+
set_current_test_info_document_loader("azure")
95+
from azure.storage.blob import BlobClient
96+
97+
connection_string = get_required_env("AZURE_BLOB_STORAGE_CONNECTION_STRING")
98+
container_name = f"ragstack-ci-{uuid.uuid4()}"
99+
blob_name = "data.txt"
100+
101+
container_client = ContainerClient.from_connection_string(
102+
conn_str=connection_string, container_name=container_name
103+
)
104+
try:
105+
container_client.create_container()
106+
107+
blob_client = BlobClient.from_connection_string(
108+
conn_str=connection_string,
109+
container_name=container_name,
110+
blob_name=blob_name,
111+
)
112+
113+
try:
114+
blob_client.upload_blob(io.BytesIO(b"test data"))
115+
loader = AzureBlobStorageContainerLoader(
116+
conn_str=connection_string, container=container_name
117+
)
118+
docs = loader.load()
119+
120+
for doc in docs:
121+
assert doc.page_content == "test data"
122+
print("got..")
123+
print(doc.metadata)
124+
assert os.path.basename(doc.metadata["source"]) == "data.txt"
125+
finally:
126+
blob_client.delete_blob()
127+
finally:
128+
container_client.delete_container()

ragstack-e2e-tests/pyproject.langchain.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ langchain-google-genai = "^0.0.4"
1717
langchain-nvidia-ai-endpoints = "^0.0.1"
1818
boto3 = "^1.29.6"
1919
huggingface-hub = "^0.19.4"
20+
azure-storage-blob = "^12.19.0"
2021

2122
# From LangChain optional deps, needed by WebBaseLoader
2223
beautifulsoup4 = "^4"

ragstack-e2e-tests/pyproject.llamaindex.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ langchain-google-genai = "^0.0.4"
1717
langchain-nvidia-ai-endpoints = "^0.0.1"
1818
boto3 = "^1.29.6"
1919
huggingface-hub = "^0.19.4"
20+
azure-storage-blob = "^12.19.0"
2021

2122
# From LangChain optional deps, needed by WebBaseLoader
2223
beautifulsoup4 = "^4"

ragstack-e2e-tests/pyproject.ragstack-ai.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ langchain-google-genai = "^0.0.4"
1717
langchain-nvidia-ai-endpoints = "^0.0.1"
1818
boto3 = "^1.29.6"
1919
huggingface-hub = "^0.19.4"
20+
azure-storage-blob = "^12.19.0"
2021

2122
# From LangChain optional deps, needed by WebBaseLoader
2223
beautifulsoup4 = "^4"

ragstack-e2e-tests/pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ authors = ["DataStax"]
88
[tool.poetry.dependencies]
99
python = ">=3.9,<3.12"
1010

11+
1112
[tool.poetry.group.test.dependencies]
1213
pytest = "*"
1314
black = "*"
@@ -17,6 +18,7 @@ langchain-google-genai = "^0.0.4"
1718
langchain-nvidia-ai-endpoints = "^0.0.1"
1819
boto3 = "^1.29.6"
1920
huggingface-hub = "^0.19.4"
21+
azure-storage-blob = "^12.19.0"
2022

2123
# From LangChain optional deps, needed by WebBaseLoader
2224
beautifulsoup4 = "^4"

ragstack-e2e-tests/tox.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ pass_env =
1212
OPEN_AI_KEY
1313
AZURE_OPEN_AI_KEY
1414
AZURE_OPEN_AI_ENDPOINT
15+
AZURE_BLOB_STORAGE_CONNECTION_STRING
1516
GCLOUD_ACCOUNT_KEY_JSON
1617
GOOGLE_API_KEY
1718
AWS_ACCESS_KEY_ID

0 commit comments

Comments
 (0)