Skip to content

Commit c00783c

Browse files
feat: Update docs for Astra DB v2 connector updates (#258)
Co-authored-by: Paul Cornell <[email protected]>
1 parent 67700b3 commit c00783c

File tree

9 files changed

+52
-13
lines changed

9 files changed

+52
-13
lines changed

api-reference/ingest/source-connectors/astradb.mdx

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,11 @@ import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx';
1515
Now call the Unstructured CLI or Python SDK. The destination connector can be any of the ones supported. This example uses the local destination connector:
1616

1717
import AstraDBAPISh from '/snippets/source_connectors/astradb.sh.mdx';
18+
import AstraDBAPIPyV2 from '/snippets/source_connectors/astradb.v2.py.mdx';
1819
import AstraDBAPIPyV1 from '/snippets/source_connectors/astradb.v1.py.mdx';
1920

2021
<CodeGroup>
21-
2222
<AstraDBAPISh />
23-
23+
<AstraDBAPIPyV2 />
2424
<AstraDBAPIPyV1 />
25-
2625
</CodeGroup>

open-source/ingest/source-connectors/astradb.mdx

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,13 @@ Now call the Unstructured CLI or Python. The destination connector can be any of
1515
This example sends data to Unstructured API services for processing by default. To process data locally instead, see the instructions at the end of this page.
1616

1717
import AstraDBSh from '/snippets/source_connectors/astradb.sh.mdx';
18+
import AstraDBPyV2 from '/snippets/source_connectors/astradb.v2.py.mdx';
1819
import AstraDBPyV1 from '/snippets/source_connectors/astradb.v1.py.mdx';
1920

2021
<CodeGroup>
21-
2222
<AstraDBSh />
23-
23+
<AstraDBPyV2 />
2424
<AstraDBPyV1 />
25-
2625
</CodeGroup>
2726

2827
import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx';

snippets/destination_connectors/astradb.sh.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ unstructured-ingest \
1717
astradb \
1818
--api-endpoint $ASTRA_DB_API_ENDPOINT \
1919
--token $ASTRA_DB_APPLICATION_TOKEN \
20-
--namespace $ASTRA_DB_NAMESPACE \
20+
--keyspace $ASTRA_DB_KEYSPACE \
2121
--collection-name $ASTRA_DB_COLLECTION \
2222
--embedding-dimension $ASTRA_DB_EMBEDDING_DIMENSIONS
2323
```

snippets/destination_connectors/astradb.v1.py.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def get_writer() -> Writer:
2828
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
2929
token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
3030
),
31-
namespace=os.getenv("ASTRA_DB_NAMESPACE"),
31+
keyspace=os.getenv("ASTRA_DB_KEYSPACE"),
3232
collection_name=os.getenv("ASTRA_DB_COLLECTION"),
3333
embedding_dimension=os.getenv("ASTRA_DB_EMBEDDING_DIMENSIONS"),
3434
),

snippets/destination_connectors/astradb.v2.py.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ if __name__ == "__main__":
4848
),
4949
stager_config=AstraDBUploadStagerConfig(),
5050
uploader_config=AstraDBUploaderConfig(
51-
namespace=os.getenv("ASTRA_DB_NAMESPACE"),
51+
keyspace=os.getenv("ASTRA_DB_KEYSPACE"),
5252
collection_name=os.getenv("ASTRA_DB_COLLECTION"),
5353
embedding_dimension=os.getenv("ASTRA_DB_EMBEDDING_DIMENSIONS")
5454
)

snippets/general-shared-text/astradb-cli-api.mdx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@ These environment variables:
1212

1313
- `ASTRA_DB_API_ENDPOINT` - The API endpoint for the Astra DB database, represented by `--api-endpoint` (CLI) or `api_endpoint` (Python). To get the endpoint, see the **Database Details > API Endpoint** value on your database's **Overview** tab.
1414
- `ASTRA_DB_APPLICATION_TOKEN` - The database application token value for the database, represented by `--token` (CLI) or `token` (Python). To get the token, see the **Database Details > Application Tokens** box on your database's **Overview** tab.
15-
- `ASTRA_DB_NAMESPACE` - The name of the namespace for the database, represented by `--namespace` (CLI) or `namespace` (Python).
16-
- `ASTRA_DB_COLLECTION` - The name of the collection for the namespace, represented by `--collection-name` (CLI) or `collection_name` (Python).
15+
- `ASTRA_DB_KEYSPACE` - The name of the keyspace for the database, represented by `--keyspace` (CLI) or `keyspace` (Python).
16+
- `ASTRA_DB_COLLECTION` - The name of the collection for the keyspace, represented by `--collection-name` (CLI) or `collection_name` (Python).
1717
- `ASTRA_DB_EMBEDDING_DIMENSIONS` - The number of dimensions in the collection, represented by `--embedding-dimension` (CLI) or `embedding_dimension` (Python).

snippets/source_connectors/astradb.sh.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ unstructured-ingest \
55
astradb \
66
--api-endpoint $ASTRA_DB_API_ENDPOINT \
77
--token $ASTRA_DB_APPLICATION_TOKEN \
8-
--namespace $ASTRA_DB_NAMESPACE \
8+
--keyspace $ASTRA_DB_KEYSPACE \
99
--collection-name $ASTRA_DB_COLLECTION \
1010
--download-dir $LOCAL_FILE_DOWNLOAD_DIR \
1111
--partition-by-api \

snippets/source_connectors/astradb.v1.py.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ if __name__ == "__main__":
2929
token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
3030
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT")
3131
),
32-
namespace=os.getenv("ASTRA_DB_NAMESPACE"),
32+
keyspace=os.getenv("ASTRA_DB_KEYSPACE"),
3333
collection_name=os.getenv("ASTRA_DB_COLLECTION")
3434
)
3535
).run()
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
```python Python Ingest v2
2+
import os
3+
4+
from unstructured_ingest.v2.pipeline.pipeline import Pipeline
5+
from unstructured_ingest.v2.interfaces import ProcessorConfig
6+
from unstructured_ingest.v2.processes.connectors.astradb import (
7+
AstraDBAccessConfig,
8+
AstraDBConnectionConfig,
9+
AstraDBDownloaderConfig,
10+
AstraDBIndexerConfig,
11+
)
12+
from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
13+
from unstructured_ingest.v2.processes.connectors.local import LocalUploaderConfig
14+
15+
# Chunking and embedding are optional.
16+
17+
if __name__ == "__main__":
18+
Pipeline.from_configs(
19+
context=ProcessorConfig(),
20+
indexer_config=AstraDBIndexerConfig(
21+
collection_name=os.getenv("ASTRA_DB_COLLECTION"),
22+
keyspace=os.getenv("ASTRA_DB_KEYSPACE"),
23+
),
24+
downloader_config=AstraDBDownloaderConfig(
25+
collection_name=os.getenv("ASTRA_DB_COLLECTION"),
26+
keyspace=os.getenv("ASTRA_DB_KEYSPACE"),
27+
),
28+
source_connection_config=AstraDBConnectionConfig(
29+
access_config=AstraDBAccessConfig(
30+
token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
31+
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
32+
),
33+
),
34+
partitioner_config=PartitionerConfig(
35+
partition_by_api=True,
36+
partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
37+
api_key=os.getenv("UNSTRUCTURED_API_KEY"),
38+
),
39+
uploader_config=LocalUploaderConfig(output_dir=os.getenv("LOCAL_FILE_OUTPUT_DIR")),
40+
).run()
41+
```

0 commit comments

Comments
 (0)