Skip to content

Commit 616f17a

Browse files
authored
updated unstructured items to use custom API url (#310)
* updated unstructured items to use custom API url * lint * fix param
1 parent a646b3e commit 616f17a

File tree

7 files changed

+13
-0
lines changed

7 files changed

+13
-0
lines changed

.github/workflows/_run_e2e_tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ jobs:
9191
GCLOUD_ACCOUNT_KEY_JSON: "${{ secrets.E2E_TESTS_GCLOUD_ACCOUNT_KEY_JSON }}"
9292
NVIDIA_API_KEY: "${{ secrets.E2E_TESTS_NVIDIA_API_KEY }}"
9393
UNSTRUCTURED_API_KEY: "${{ secrets.E2E_TESTS_UNSTRUCTURED_API_KEY }}"
94+
UNSTRUCTURED_API_URL: "${{ secrets.E2E_TESTS_UNSTRUCTURED_API_URL }}"
9495
run: |
9596
source scripts/ci-common-env.sh
9697
tox -e notebooks
@@ -118,6 +119,7 @@ jobs:
118119
LANGCHAIN_API_KEY: "${{ secrets.E2E_TESTS_LANGCHAIN_API_KEY }}"
119120
LLAMA_CLOUD_API_KEY: "${{ secrets.E2E_TESTS_LLAMA_CLOUD_API_KEY }}"
120121
UNSTRUCTURED_API_KEY: "${{ secrets.E2E_TESTS_UNSTRUCTURED_API_KEY }}"
122+
UNSTRUCTURED_API_URL: "${{ secrets.E2E_TESTS_UNSTRUCTURED_API_URL }}"
121123
run: |
122124
source scripts/ci-common-env.sh
123125
if [ "${{ inputs.suite-name == 'ragstack' }}" == "true" ]; then

docs/modules/examples/pages/langchain-unstructured-astra.adoc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Create a `.env` file in your application with the following environment variable
3838
[source,bash]
3939
----
4040
UNSTRUCTURED_API_KEY=...
41+
UNSTRUCTURED_API_URL=https://api.unstructured.io/general/v0/general
4142
ASTRA_DB_API_ENDPOINT=https://<ASTRA_DB_ID>-<ASTRA_DB_REGION>.apps.astra.datastax.com
4243
ASTRA_DB_APPLICATION_TOKEN=AstraCS:...
4344
OPENAI_API_KEY=sk-...
@@ -103,6 +104,7 @@ This works well if your document doesn't contain any complex formatting or table
103104
loader = UnstructuredAPIFileLoader(
104105
file_path="./attention_pages_9_10.pdf",
105106
api_key=os.getenv("UNSTRUCTURED_API_KEY"),
107+
url = os.getenv("UNSTRUCTURED_API_URL"),
106108
)
107109
simple_docs = loader.load()
108110
@@ -125,6 +127,7 @@ A list of all the different element types can be found here: https://unstructure
125127
elements = unstructured.get_elements_from_api(
126128
file_path="./attention_pages_9_10.pdf",
127129
api_key=os.getenv("UNSTRUCTURED_API_KEY"),
130+
api_url=os.getenv("UNSTRUCTURED_API_URL"),
128131
strategy="hi_res", # default "auto"
129132
pdf_infer_table_structure=True,
130133
)
@@ -262,6 +265,7 @@ else:
262265
elements = unstructured.get_elements_from_api(
263266
file_path="./attention_pages_9_10.pdf",
264267
api_key=os.getenv("UNSTRUCTURED_API_KEY"),
268+
api_url=os.getenv("UNSTRUCTURED_API_URL"),
265269
strategy="hi_res", # default "auto"
266270
pdf_infer_table_structure=True,
267271
)

examples/notebooks/langchain-unstructured-astra.ipynb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
"from getpass import getpass\n",
7373
"\n",
7474
"os.environ[\"UNSTRUCTURED_API_KEY\"] = getpass(\"Enter your Unstructured API Key:\")\n",
75+
"os.environ[\"UNSTRUCTURED_API_URL\"] = getpass(\"Enter your Unstructured API URL:\")\n",
7576
"os.environ[\"ASTRA_DB_ENDPOINT\"] = input(\"Enter you Astra DB API Endpoint: \")\n",
7677
"os.environ[\"ASTRA_DB_TOKEN\"] = getpass(\"Enter you Astra DB Token: \")\n",
7778
"os.environ[\"OPENAI_API_KEY\"] = getpass(\"Enter your OpenAI API Key: \")"
@@ -123,6 +124,7 @@
123124
"loader = UnstructuredAPIFileLoader(\n",
124125
" file_path=\"./resources/attention_pages_9_10.pdf\",\n",
125126
" api_key=os.getenv(\"UNSTRUCTURED_API_KEY\"),\n",
127+
" url = os.getenv(\"UNSTRUCTURED_API_URL\"),\n",
126128
")\n",
127129
"simple_docs = loader.load()\n",
128130
"len(simple_docs)"
@@ -222,6 +224,7 @@
222224
"elements = unstructured.get_elements_from_api(\n",
223225
" file_path=\"./resources/attention_pages_9_10.pdf\",\n",
224226
" api_key=os.getenv(\"UNSTRUCTURED_API_KEY\"),\n",
227+
" api_url = os.getenv(\"UNSTRUCTURED_API_URL\"),\n",
225228
" strategy=\"hi_res\", # default \"auto\"\n",
226229
" pdf_infer_table_structure=True,\n",
227230
")\n",

ragstack-e2e-tests/.env.template

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,4 @@ VECTOR_DATABASE_TYPE=astradb
3636

3737
# Unstructured.io
3838
# UNSTRUCTURED_API_KEY=
39+
# UNSTRUCTURED_API_URL=

ragstack-e2e-tests/e2e_tests/langchain/test_unstructured.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def test_unstructured_api(vector_store, unstructured_mode, request):
5959
mode=unstructured_mode,
6060
strategy="auto",
6161
api_key=get_required_env("UNSTRUCTURED_API_KEY"),
62+
url=get_required_env("UNSTRUCTURED_API_URL"),
6263
)
6364

6465
splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=0)

ragstack-e2e-tests/tox.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pass_env =
2828
LANGCHAIN_PROJECT
2929
LLAMA_CLOUD_API_KEY
3030
UNSTRUCTURED_API_KEY
31+
UNSTRUCTURED_API_URL
3132
deps =
3233
poetry
3334
commands =

tox.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ pass_env =
2222
LLAMA_CLOUD_API_KEY
2323
NVIDIA_API_KEY
2424
UNSTRUCTURED_API_KEY
25+
UNSTRUCTURED_API_URL
2526
deps =
2627
pytest
2728
nbmake

0 commit comments

Comments
 (0)