Skip to content

Commit e673fd6

Browse files
authored
API: OneDrive v2 destination connector (#338)
1 parent c0366eb commit e673fd6

File tree

6 files changed

+142
-0
lines changed

6 files changed

+142
-0
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
---
2+
title: OneDrive
3+
---
4+
5+
import NewDocument from '/snippets/general-shared-text/new-document.mdx';
6+
7+
<NewDocument />
8+
9+
import SharedContentOneDrive from '/snippets/dc-shared-text/onedrive-cli-api.mdx';
10+
import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx';
11+
12+
<SharedContentOneDrive/>
13+
<SharedAPIKeyURL/>
14+
15+
Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported. This example uses the local source connector:
16+
17+
import OneDriveAPISh from '/snippets/destination_connectors/onedrive.sh.mdx';
18+
import OneDriveAPIPyV2 from '/snippets/destination_connectors/onedrive.v2.py.mdx';
19+
20+
<CodeGroup>
21+
<OneDriveAPISh />
22+
<OneDriveAPIPyV2 />
23+
</CodeGroup>
24+

mint.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@
202202
"open-source/ingest/destination-connectors/local",
203203
"open-source/ingest/destination-connectors/milvus",
204204
"open-source/ingest/destination-connectors/mongodb",
205+
"open-source/ingest/destination-connectors/onedrive",
205206
"open-source/ingest/destination-connectors/opensearch",
206207
"open-source/ingest/destination-connectors/pinecone",
207208
"open-source/ingest/destination-connectors/postgresql",
@@ -359,6 +360,7 @@
359360
"api-reference/ingest/destination-connector/local",
360361
"api-reference/ingest/destination-connector/milvus",
361362
"api-reference/ingest/destination-connector/mongodb",
363+
"api-reference/ingest/destination-connector/onedrive",
362364
"api-reference/ingest/destination-connector/opensearch",
363365
"api-reference/ingest/destination-connector/pinecone",
364366
"api-reference/ingest/destination-connector/postgresql",
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
---
2+
title: OneDrive
3+
---
4+
5+
import NewDocument from '/snippets/general-shared-text/new-document.mdx';
6+
7+
<NewDocument />
8+
9+
import SharedOneDrive from '/snippets/dc-shared-text/onedrive-cli-api.mdx';
10+
11+
<SharedOneDrive />
12+
13+
Now call the Unstructured CLI or Python. The source connector can be any of the ones supported. This example uses the local source connector.
14+
15+
This example sends files to Unstructured API services for processing by default. To process files locally instead, see the instructions at the end of this page.
16+
17+
import OneDriveAPISh from '/snippets/destination_connectors/onedrive.sh.mdx';
18+
import OneDriveAPIPyV2 from '/snippets/destination_connectors/onedrive.v2.py.mdx';
19+
20+
<CodeGroup>
21+
<OneDriveAPISh />
22+
<OneDriveAPIPyV2 />
23+
</CodeGroup>
24+
25+
import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx';
26+
27+
<SharedPartitionByAPIOSS/>
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Batch process all your records to store structured outputs in a OneDrive account.
2+
3+
You will need:
4+
5+
import SharedOneDrive from '/snippets/general-shared-text/onedrive.mdx';
6+
import SharedOneDriveCLIAPI from '/snippets/general-shared-text/onedrive-cli-api.mdx';
7+
8+
<SharedOneDrive />
9+
<SharedOneDriveCLIAPI />
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
```bash CLI
2+
#!/usr/bin/env bash
3+
4+
# Chunking and embedding are optional.
5+
6+
unstructured-ingest \
7+
local \
8+
--input-path $LOCAL_FILE_INPUT_DIR \
9+
--chunking-strategy by_title \
10+
--embedding-provider huggingface \
11+
--partition-by-api \
12+
--api-key $UNSTRUCTURED_API_KEY \
13+
--partition-endpoint $UNSTRUCTURED_API_URL \
14+
--strategy hi_res \
15+
--additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}" \
16+
onedrive \
17+
--client-cred $ONEDRIVE_CLIENT_CRED \
18+
--client-id $ONEDRIVE_CLIENT_ID \
19+
--user-pname $ONEDRIVE_USER_PNAME \
20+
--tenant $ONEDRIVE_TENANT \
21+
--authority-url $ONEDRIVE_AUTHORITY_URL \
22+
--remote-url $ONEDRIVE_PATH \
23+
--prefix "onedrive://"
24+
```
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
```bash Python Ingest v2
2+
import os
3+
4+
from unstructured_ingest.v2.pipeline.pipeline import Pipeline
5+
from unstructured_ingest.v2.interfaces import ProcessorConfig
6+
7+
from unstructured_ingest.v2.processes.connectors.local import (
8+
LocalIndexerConfig,
9+
LocalDownloaderConfig,
10+
LocalConnectionConfig
11+
)
12+
13+
from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
14+
from unstructured_ingest.v2.processes.chunker import ChunkerConfig
15+
from unstructured_ingest.v2.processes.embedder import EmbedderConfig
16+
17+
from unstructured_ingest.v2.processes.connectors.onedrive import (
18+
OnedriveConnectionConfig,
19+
OnedriveAccessConfig,
20+
OnedriveUploaderConfig
21+
)
22+
23+
# Chunking and embedding are optional.
24+
25+
if __name__ == "__main__":
26+
Pipeline.from_configs(
27+
context=ProcessorConfig(),
28+
indexer_config=LocalIndexerConfig(input_path=os.getenv("LOCAL_FILE_INPUT_DIR")),
29+
downloader_config=LocalDownloaderConfig(),
30+
source_connection_config=LocalConnectionConfig(),
31+
partitioner_config=PartitionerConfig(
32+
partition_by_api=True,
33+
api_key=os.getenv("UNSTRUCTURED_API_KEY"),
34+
partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
35+
strategy="hi_res",
36+
additional_partition_args={
37+
"split_pdf_page": True,
38+
"split_pdf_allow_failed": True,
39+
"split_pdf_concurrency_level": 15
40+
}
41+
),
42+
chunker_config=ChunkerConfig(chunking_strategy="by_title"),
43+
embedder_config=EmbedderConfig(embedding_provider="huggingface"),
44+
destination_connection_config=OnedriveConnectionConfig(
45+
access_config=OnedriveAccessConfig(client_cred=os.getenv("ONEDRIVE_CLIENT_CRED")),
46+
client_id=os.getenv("ONEDRIVE_CLIENT_ID"),
47+
user_pname=os.getenv("ONEDRIVE_USER_PNAME"),
48+
tenant=os.getenv("ONEDRIVE_TENANT"),
49+
authority_url=os.getenv("ONEDRIVE_AUTHORITY_URL")
50+
),
51+
uploader_config=OnedriveUploaderConfig(
52+
remote_url=os.getenv("ONEDRIVE_PATH"),
53+
prefix="onedrive://"
54+
)
55+
).run()
56+
```

0 commit comments

Comments
 (0)