Skip to content

Commit 444f7d5

Browse files
authored
bugfix/add missing lancedb extra (#264)
* add missing extra * fix text * Add cloud connector for lancedb * fix int test
1 parent cb01092 commit 444f7d5

File tree

7 files changed

+81
-22
lines changed

7 files changed

+81
-22
lines changed

CHANGELOG.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
## 0.3.2-dev4
1+
## 0.3.3-dev1
22

33
### Enhancements
4+
45
* **Add `precheck` to Milvus connector**
56

67
### Fixes
7-
* **Make AstraDB uploader truncate `text` and `text_as_html` content to max 8000 bytes**
8+
9+
* **Make AstraDB uploader truncate `text` and `text_as_html` content to max 8000 bytes**
10+
* **Add missing LanceDb extra**
811

912
## 0.3.2
1013

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
setup.py
33
4-
unstructured - pre-processing tools for unstructured data
4+
unstructured-ingest - pre-processing tools for unstructured data
55
66
Copyright 2022 Unstructured Technologies, Inc.
77
@@ -104,6 +104,7 @@ def load_requirements(file: Union[str, Path]) -> List[str]:
104104
"jira": load_requirements("requirements/connectors/jira.in"),
105105
"kafka": load_requirements("requirements/connectors/kafka.in"),
106106
"kdbai": load_requirements("requirements/connectors/kdbai.in"),
107+
"lancedb": load_requirements("requirements/connectors/lancedb.in"),
107108
"milvus": load_requirements("requirements/connectors/milvus.in"),
108109
"mongodb": load_requirements("requirements/connectors/mongodb.in"),
109110
"notion": load_requirements("requirements/connectors/notion.in"),

test/integration/connectors/test_lancedb.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
from test.integration.connectors.utils.constants import DESTINATION_TAG
1515
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
1616
from unstructured_ingest.v2.processes.connectors.lancedb.aws import (
17-
LanceDBS3AccessConfig,
18-
LanceDBS3ConnectionConfig,
19-
LanceDBS3Uploader,
17+
LanceDBAwsAccessConfig,
18+
LanceDBAwsConnectionConfig,
19+
LanceDBAwsUploader,
2020
)
2121
from unstructured_ingest.v2.processes.connectors.lancedb.azure import (
2222
LanceDBAzureAccessConfig,
@@ -156,7 +156,7 @@ def _get_uri(target: Literal["local", "s3", "gcs", "az"], local_base_path: Path)
156156

157157
def _get_uploader(
158158
uri: str,
159-
) -> Union[LanceDBAzureUploader, LanceDBAzureUploader, LanceDBS3Uploader, LanceDBGSPUploader]:
159+
) -> Union[LanceDBAzureUploader, LanceDBAzureUploader, LanceDBAwsUploader, LanceDBGSPUploader]:
160160
target = uri.split("://", maxsplit=1)[0] if uri.startswith(("s3", "az", "gs")) else "local"
161161
if target == "az":
162162
azure_connection_string = os.getenv("AZURE_DEST_CONNECTION_STR")
@@ -170,10 +170,10 @@ def _get_uploader(
170170
)
171171

172172
elif target == "s3":
173-
return LanceDBS3Uploader(
173+
return LanceDBAwsUploader(
174174
upload_config=LanceDBUploaderConfig(table_name=TABLE_NAME),
175-
connection_config=LanceDBS3ConnectionConfig(
176-
access_config=LanceDBS3AccessConfig(
175+
connection_config=LanceDBAwsConnectionConfig(
176+
access_config=LanceDBAwsAccessConfig(
177177
aws_access_key_id=os.getenv("S3_INGEST_TEST_ACCESS_KEY"),
178178
aws_secret_access_key=os.getenv("S3_INGEST_TEST_SECRET_KEY"),
179179
),

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.3.2-dev4" # pragma: no cover
1+
__version__ = "0.3.3-dev1" # pragma: no cover

unstructured_ingest/v2/processes/connectors/lancedb/__init__.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,25 @@
66
from .aws import lancedb_aws_destination_entry
77
from .azure import CONNECTOR_TYPE as LANCEDB_AZURE_CONNECTOR_TYPE
88
from .azure import lancedb_azure_destination_entry
9+
from .cloud import CONNECTOR_TYPE as LANCEDB_CLOUD_CONNECTOR_TYPE
10+
from .cloud import lancedb_cloud_destination_entry
911
from .gcp import CONNECTOR_TYPE as LANCEDB_GCS_CONNECTOR_TYPE
1012
from .gcp import lancedb_gcp_destination_entry
1113
from .local import CONNECTOR_TYPE as LANCEDB_LOCAL_CONNECTOR_TYPE
1214
from .local import lancedb_local_destination_entry
1315

14-
add_destination_entry(LANCEDB_S3_CONNECTOR_TYPE, lancedb_aws_destination_entry)
15-
add_destination_entry(LANCEDB_AZURE_CONNECTOR_TYPE, lancedb_azure_destination_entry)
16-
add_destination_entry(LANCEDB_GCS_CONNECTOR_TYPE, lancedb_gcp_destination_entry)
17-
add_destination_entry(LANCEDB_LOCAL_CONNECTOR_TYPE, lancedb_local_destination_entry)
16+
add_destination_entry(
17+
destination_type=LANCEDB_S3_CONNECTOR_TYPE, entry=lancedb_aws_destination_entry
18+
)
19+
add_destination_entry(
20+
destination_type=LANCEDB_AZURE_CONNECTOR_TYPE, entry=lancedb_azure_destination_entry
21+
)
22+
add_destination_entry(
23+
destination_type=LANCEDB_GCS_CONNECTOR_TYPE, entry=lancedb_gcp_destination_entry
24+
)
25+
add_destination_entry(
26+
destination_type=LANCEDB_LOCAL_CONNECTOR_TYPE, entry=lancedb_local_destination_entry
27+
)
28+
add_destination_entry(
29+
destination_type=LANCEDB_CLOUD_CONNECTOR_TYPE, entry=lancedb_cloud_destination_entry
30+
)

unstructured_ingest/v2/processes/connectors/lancedb/aws.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,28 +15,28 @@
1515
CONNECTOR_TYPE = "lancedb_aws"
1616

1717

18-
class LanceDBS3AccessConfig(AccessConfig):
18+
class LanceDBAwsAccessConfig(AccessConfig):
1919
aws_access_key_id: str = Field(description="The AWS access key ID to use.")
2020
aws_secret_access_key: str = Field(description="The AWS secret access key to use.")
2121

2222

23-
class LanceDBS3ConnectionConfig(LanceDBRemoteConnectionConfig):
24-
access_config: Secret[LanceDBS3AccessConfig]
23+
class LanceDBAwsConnectionConfig(LanceDBRemoteConnectionConfig):
24+
access_config: Secret[LanceDBAwsAccessConfig]
2525

2626
def get_storage_options(self) -> dict:
2727
return {**self.access_config.get_secret_value().model_dump(), "timeout": self.timeout}
2828

2929

3030
@dataclass
31-
class LanceDBS3Uploader(LanceDBUploader):
31+
class LanceDBAwsUploader(LanceDBUploader):
3232
upload_config: LanceDBUploaderConfig
33-
connection_config: LanceDBS3ConnectionConfig
33+
connection_config: LanceDBAwsConnectionConfig
3434
connector_type: str = CONNECTOR_TYPE
3535

3636

3737
lancedb_aws_destination_entry = DestinationRegistryEntry(
38-
connection_config=LanceDBS3ConnectionConfig,
39-
uploader=LanceDBS3Uploader,
38+
connection_config=LanceDBAwsConnectionConfig,
39+
uploader=LanceDBAwsUploader,
4040
uploader_config=LanceDBUploaderConfig,
4141
upload_stager_config=LanceDBUploadStagerConfig,
4242
upload_stager=LanceDBUploadStager,
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from dataclasses import dataclass
2+
3+
from pydantic import Field, Secret
4+
5+
from unstructured_ingest.v2.interfaces.connector import AccessConfig
6+
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
7+
from unstructured_ingest.v2.processes.connectors.lancedb.lancedb import (
8+
LanceDBRemoteConnectionConfig,
9+
LanceDBUploader,
10+
LanceDBUploaderConfig,
11+
LanceDBUploadStager,
12+
LanceDBUploadStagerConfig,
13+
)
14+
15+
CONNECTOR_TYPE = "lancedb_cloud"
16+
17+
18+
class LanceDBCloudAccessConfig(AccessConfig):
19+
api_key: str = Field(description="Api key associated with LanceDb cloud")
20+
21+
22+
class LanceDBCloudConnectionConfig(LanceDBRemoteConnectionConfig):
23+
access_config: Secret[LanceDBCloudAccessConfig]
24+
25+
def get_storage_options(self) -> dict:
26+
return {**self.access_config.get_secret_value().model_dump(), "timeout": self.timeout}
27+
28+
29+
@dataclass
30+
class LanceDBCloudUploader(LanceDBUploader):
31+
upload_config: LanceDBUploaderConfig
32+
connection_config: LanceDBCloudConnectionConfig
33+
connector_type: str = CONNECTOR_TYPE
34+
35+
36+
lancedb_cloud_destination_entry = DestinationRegistryEntry(
37+
connection_config=LanceDBCloudConnectionConfig,
38+
uploader=LanceDBCloudUploader,
39+
uploader_config=LanceDBUploaderConfig,
40+
upload_stager_config=LanceDBUploadStagerConfig,
41+
upload_stager=LanceDBUploadStager,
42+
)

0 commit comments

Comments
 (0)