Skip to content

Commit 5436ceb

Browse files
authored
chore: add destination name format for weaviate (#390)
* add name formatting function; update test * tidy and version
1 parent 0fa4fee commit 5436ceb

File tree

4 files changed

+21
-4
lines changed

4 files changed

+21
-4
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
## 0.5.6-dev0
2+
3+
### Enhancements
4+
5+
* Add name formatting to Weaviate destination uploader
6+
17
## 0.5.5
28

39
### Enhancements

test/integration/connectors/weaviate/test_local.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ def run_uploader_and_validate(
7878
validate_count(expected_count=expected_count)
7979

8080

81-
@pytest.mark.asyncio
8281
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
8382
def test_weaviate_local_destination(upload_file: Path, collection: str, tmp_path: Path):
8483
file_data = FileData(
@@ -142,11 +141,12 @@ def test_weaviate_local_create_destination(weaviate_instance):
142141
upload_config=LocalWeaviateUploaderConfig(),
143142
connection_config=LocalWeaviateConnectionConfig(),
144143
)
145-
collection_name = "system_created"
144+
collection_name = "system_created-123"
145+
formatted_collection_name = "System_created_123"
146146
created = uploader.create_destination(destination_name=collection_name)
147147
assert created
148148
with uploader.connection_config.get_client() as weaviate_client:
149-
assert weaviate_client.collections.exists(name=collection_name)
149+
assert weaviate_client.collections.exists(name=formatted_collection_name)
150150

151151
created = uploader.create_destination(destination_name=collection_name)
152152
assert not created

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.5.5" # pragma: no cover
1+
__version__ = "0.5.6-dev0" # pragma: no cover

unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import re
23
from abc import ABC, abstractmethod
34
from contextlib import contextmanager
45
from dataclasses import dataclass, field
@@ -232,16 +233,26 @@ def precheck(self) -> None:
232233
def init(self, *kwargs: Any) -> None:
233234
self.create_destination()
234235

236+
def format_destination_name(self, destination_name: str) -> str:
237+
# Weaviate naming requirements:
238+
# must be alphanumeric and underscores only
239+
formatted = re.sub(r"[^a-zA-Z0-9]", "_", destination_name)
240+
# must begin with capital letter
241+
return formatted.capitalize()
242+
235243
def create_destination(
236244
self, destination_name: str = "elements", vector_length: Optional[int] = None, **kwargs: Any
237245
) -> bool:
246+
destination_name = self.format_destination_name(destination_name)
238247
collection_name = self.upload_config.collection or destination_name
239248
self.upload_config.collection = collection_name
249+
240250
connectors_dir = Path(__file__).parents[1]
241251
collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
242252
with collection_config_file.open() as f:
243253
collection_config = json.load(f)
244254
collection_config["class"] = collection_name
255+
245256
if not self._collection_exists():
246257
logger.info(
247258
f"creating default weaviate collection '{collection_name}' with default configs"

0 commit comments

Comments
 (0)