Skip to content

Commit a990aa8

Browse files
Update Databricks CI test (#135)
1 parent 3cf0bf9 commit a990aa8

File tree

7 files changed

+34
-29
lines changed

7 files changed

+34
-29
lines changed

.github/workflows/e2e.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,9 +172,9 @@ jobs:
172172
ASTRA_DB_API_ENDPOINT: ${{secrets.ASTRA_DB_ENDPOINT}}
173173
CLARIFAI_API_KEY: ${{secrets.CLARIFAI_API_KEY}}
174174
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
175-
DATABRICKS_USERNAME: ${{secrets.DATABRICKS_USERNAME}}
176-
DATABRICKS_PASSWORD: ${{secrets.DATABRICKS_PASSWORD}}
177175
DATABRICKS_CATALOG: ${{secrets.DATABRICKS_CATALOG}}
176+
DATABRICKS_CLIENT_ID: ${{secrets.DATABRICKS_CLIENT_ID}}
177+
DATABRICKS_CLIENT_SECRET: ${{secrets.DATABRICKS_CLIENT_SECRET}}
178178
SHAREPOINT_CLIENT_ID: ${{secrets.SHAREPOINT_CLIENT_ID}}
179179
SHAREPOINT_CRED: ${{secrets.SHAREPOINT_CRED}}
180180
KDBAI_BEARER_TOKEN: ${{ secrets.KDBAI_BEARER_TOKEN }}

CHANGELOG.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
## 0.0.21-dev1
1+
## 0.0.21-dev2
22

33
### Fixes
44

5-
* **Fix forward compatibility issues with `unstructured-client==0.26.0`. Update syntax and create a new SDK util file for reuse in the Partitioner and Chunker
5+
* **Fix forward compatibility issues with `unstructured-client==0.26.0`.** Update syntax and create a new SDK util file for reuse in the Partitioner and Chunker
6+
7+
* **Update Databricks CI Test** Update to use client_id and client_secret auth. Also return files.upload method to one from open source.
68

79
* **fix astra src bug**
810

test_e2e/dest/databricks-volumes.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ source "$SCRIPT_DIR"/cleanup.sh
2323
function cleanup() {
2424
python "$SCRIPT_DIR"/python/test-databricks-volumes.py cleanup \
2525
--host "$DATABRICKS_HOST" \
26-
--username "$DATABRICKS_USERNAME" \
27-
--password "$DATABRICKS_PASSWORD" \
26+
--client-id "$DATABRICKS_CLIENT_ID" \
27+
--client-secret "$DATABRICKS_CLIENT_SECRET" \
2828
--volume "$DATABRICKS_VOLUME" \
2929
--catalog "$DATABRICKS_CATALOG" \
3030
--volume-path "$DATABRICKS_VOLUME_PATH"
@@ -48,16 +48,16 @@ PYTHONPATH=. ./unstructured_ingest/main.py \
4848
--work-dir "$WORK_DIR" \
4949
databricks-volumes \
5050
--host "$DATABRICKS_HOST" \
51-
--username "$DATABRICKS_USERNAME" \
52-
--password "$DATABRICKS_PASSWORD" \
51+
--client-id "$DATABRICKS_CLIENT_ID" \
52+
--client-secret "$DATABRICKS_CLIENT_SECRET" \
5353
--volume "$DATABRICKS_VOLUME" \
5454
--catalog "$DATABRICKS_CATALOG" \
5555
--volume-path "$DATABRICKS_VOLUME_PATH"
5656

5757
python "$SCRIPT_DIR"/python/test-databricks-volumes.py test \
5858
--host "$DATABRICKS_HOST" \
59-
--username "$DATABRICKS_USERNAME" \
60-
--password "$DATABRICKS_PASSWORD" \
59+
--client-id "$DATABRICKS_CLIENT_ID" \
60+
--client-secret "$DATABRICKS_CLIENT_SECRET" \
6161
--volume "$DATABRICKS_VOLUME" \
6262
--catalog "$DATABRICKS_CATALOG" \
6363
--volume-path "$DATABRICKS_VOLUME_PATH"

test_e2e/python/test-databricks-volumes.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,20 @@ def _get_volume_path(catalog: str, volume: str, volume_path: str):
1616

1717
@cli.command()
1818
@click.option("--host", type=str, required=True)
19-
@click.option("--username", type=str, required=True)
20-
@click.option("--password", type=str, required=True)
19+
@click.option("--client-id", type=str, required=True)
20+
@click.option("--client-secret", type=str, required=True)
2121
@click.option("--catalog", type=str, required=True)
2222
@click.option("--volume", type=str, required=True)
2323
@click.option("--volume-path", type=str, required=True)
2424
def test(
2525
host: str,
26-
username: str,
27-
password: str,
26+
client_id: str,
27+
client_secret: str,
2828
catalog: str,
2929
volume: str,
3030
volume_path: str,
3131
):
32-
client = WorkspaceClient(host=host, username=username, password=password)
32+
client = WorkspaceClient(host=host, client_id=client_id, client_secret=client_secret)
3333
files = list(
3434
client.files.list_directory_contents(_get_volume_path(catalog, volume, volume_path))
3535
)
@@ -53,20 +53,20 @@ def test(
5353

5454
@cli.command()
5555
@click.option("--host", type=str, required=True)
56-
@click.option("--username", type=str, required=True)
57-
@click.option("--password", type=str, required=True)
56+
@click.option("--client-id", type=str, required=True)
57+
@click.option("--client-secret", type=str, required=True)
5858
@click.option("--catalog", type=str, required=True)
5959
@click.option("--volume", type=str, required=True)
6060
@click.option("--volume-path", type=str, required=True)
6161
def cleanup(
6262
host: str,
63-
username: str,
64-
password: str,
63+
client_id: str,
64+
client_secret: str,
6565
catalog: str,
6666
volume: str,
6767
volume_path: str,
6868
):
69-
client = WorkspaceClient(host=host, username=username, password=password)
69+
client = WorkspaceClient(host=host, client_id=client_id, client_secret=client_secret)
7070

7171
for file in client.files.list_directory_contents(
7272
_get_volume_path(catalog, volume, volume_path)

test_e2e/test-dest.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ all_tests=(
2222
'chroma.sh'
2323
'clarifai.sh'
2424
'couchbase.sh'
25+
'databricks-volumes.sh'
2526
'delta-table.sh'
2627
'dropbox.sh'
2728
'elasticsearch.sh'
@@ -68,7 +69,6 @@ tests_to_ignore=(
6869
'notion.sh'
6970
'dropbox.sh'
7071
'sharepoint.sh'
71-
'databricks-volumes.sh'
7272
)
7373

7474
for test in "${all_tests[@]}"; do

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.21-dev1" # pragma: no cover
1+
__version__ = "0.0.21-dev2" # pragma: no cover

unstructured_ingest/v2/processes/connectors/databricks_volumes.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,10 @@ class DatabricksVolumesAccessConfig(AccessConfig):
4242
description="The Databricks password part of basic authentication. "
4343
"Only possible when Host is *.cloud.databricks.com (AWS).",
4444
)
45-
client_id: Optional[str] = Field(default=None)
46-
client_secret: Optional[str] = Field(default=None)
45+
client_id: Optional[str] = Field(default=None, description="Client ID of the OAuth app.")
46+
client_secret: Optional[str] = Field(
47+
default=None, description="Client Secret of the OAuth app."
48+
)
4749
token: Optional[str] = Field(
4850
default=None,
4951
description="The Databricks personal access token (PAT) (AWS, Azure, and GCP) or "
@@ -140,11 +142,12 @@ def precheck(self) -> None:
140142

141143
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
142144
output_path = os.path.join(self.upload_config.path, path.name)
143-
self.get_client().files.upload(
144-
file_path=output_path,
145-
contents=path,
146-
overwrite=self.upload_config.overwrite,
147-
)
145+
with open(path, "rb") as elements_file:
146+
self.get_client().files.upload(
147+
file_path=output_path,
148+
contents=elements_file,
149+
overwrite=self.upload_config.overwrite,
150+
)
148151

149152

150153
databricks_volumes_destination_entry = DestinationRegistryEntry(

0 commit comments

Comments
 (0)