From 9de0dd4da91afb58ebede0c7306a204fe8005096 Mon Sep 17 00:00:00 2001 From: Mateusz Kuprowski Date: Fri, 3 Jan 2025 14:04:21 +0100 Subject: [PATCH 1/9] Added possibility of utilising namespaces --- .../v2/processes/connectors/pinecone.py | 78 +++++++++++-------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/unstructured_ingest/v2/processes/connectors/pinecone.py b/unstructured_ingest/v2/processes/connectors/pinecone.py index fe62f97ed..08309e50c 100644 --- a/unstructured_ingest/v2/processes/connectors/pinecone.py +++ b/unstructured_ingest/v2/processes/connectors/pinecone.py @@ -105,7 +105,10 @@ class PineconeUploaderConfig(UploaderConfig): ) namespace: Optional[str] = Field( default=None, - description="The namespace to write to. If not specified, the default namespace is used", + description=( + "The namespace to write to. If not specified (None), the Pinecone SDK " + "will fall back to the 'default' namespace automatically." + ), ) record_id_key: str = Field( default=RECORD_ID_LABEL, @@ -173,49 +176,56 @@ def precheck(self): raise DestinationConnectionError(f"failed to validate connection: {e}") def pod_delete_by_record_id(self, file_data: FileData) -> None: + """Deletion for Pinecone Pod-based index.""" logger.debug( - f"deleting any content with metadata " + f"Deleting any content with metadata " f"{self.upload_config.record_id_key}={file_data.identifier} " - f"from pinecone pod index" + f"from Pinecone pod index" ) index = self.connection_config.get_index(pool_threads=MAX_POOL_THREADS) + + # Build the delete_kwargs, only include 'namespace' if it's not None delete_kwargs = { - "filter": {self.upload_config.record_id_key: {"$eq": file_data.identifier}} + "filter": {self.upload_config.record_id_key: {"$eq": file_data.identifier}}, } - if namespace := self.upload_config.namespace: - delete_kwargs["namespace"] = namespace + if self.upload_config.namespace is not None: + delete_kwargs["namespace"] = self.upload_config.namespace resp = index.delete(**delete_kwargs) logger.debug( - f"deleted any content with metadata " + f"Deleted any content with metadata " f"{self.upload_config.record_id_key}={file_data.identifier} " - f"from pinecone index: {resp}" + f"from Pinecone index: {resp}" ) def serverless_delete_by_record_id(self, file_data: FileData) -> None: + """Deletion for Pinecone Serverless index.""" logger.debug( - f"deleting any content with metadata " + f"Deleting any content with metadata " f"{self.upload_config.record_id_key}={file_data.identifier} " - f"from pinecone serverless index" + f"from Pinecone serverless index" ) index = self.connection_config.get_index(pool_threads=MAX_POOL_THREADS) + + # Build the list_kwargs, only include 'namespace' if it's not None list_kwargs = {"prefix": f"{file_data.identifier}#"} + if self.upload_config.namespace is not None: + list_kwargs["namespace"] = self.upload_config.namespace + deleted_ids = 0 - if namespace := self.upload_config.namespace: - list_kwargs["namespace"] = namespace for ids in index.list(**list_kwargs): deleted_ids += len(ids) delete_kwargs = {"ids": ids} - if namespace := self.upload_config.namespace: - delete_resp = delete_kwargs["namespace"] = namespace - # delete_resp should be an empty dict if there were no errors - if delete_resp: - logger.error(f"failed to delete batch of ids: {delete_resp}") - index.delete(**delete_kwargs) + if self.upload_config.namespace is not None: + delete_kwargs["namespace"] = self.upload_config.namespace + delete_resp = index.delete(**delete_kwargs) + if delete_resp: + logger.error(f"Failed to delete batch of IDs: {delete_resp}") + logger.info( - f"deleted {deleted_ids} records with metadata " + f"Deleted {deleted_ids} records with metadata " f"{self.upload_config.record_id_key}={file_data.identifier} " - f"from pinecone index" + f"from Pinecone index" ) @requires_dependencies(["pinecone"], extras="pinecone") @@ -229,26 +239,28 @@ def upsert_batches_async(self, elements_dict: list[dict]): max_batch_size=self.upload_config.batch_size, ) ) - logger.info(f"split doc with {len(elements_dict)} elements into {len(chunks)} batches") + logger.info(f"Split doc with {len(elements_dict)} elements into {len(chunks)} batches") max_pool_threads = min(len(chunks), MAX_POOL_THREADS) - if self.upload_config.pool_threads: - pool_threads = min(self.upload_config.pool_threads, max_pool_threads) - else: - pool_threads = max_pool_threads + pool_threads = min(self.upload_config.pool_threads or max_pool_threads, max_pool_threads) index = self.connection_config.get_index(pool_threads=pool_threads) + + # Build upsert_kwargs for each chunk + upsert_kwargs_list = [] + for chunk in chunks: + kwargs = {"vectors": chunk, "async_req": True} + if self.upload_config.namespace is not None: + kwargs["namespace"] = self.upload_config.namespace + upsert_kwargs_list.append(kwargs) + with index: - upsert_kwargs = [{"vectors": chunk, "async_req": True} for chunk in chunks] - if namespace := self.upload_config.namespace: - for kwargs in upsert_kwargs: - kwargs["namespace"] = namespace - async_results = [index.upsert(**kwarg) for kwarg in upsert_kwargs] - # Wait for and retrieve responses (this raises in case of error) + # Execute async upserts + async_results = [index.upsert(**kwargs) for kwargs in upsert_kwargs_list] try: results = [async_result.get() for async_result in async_results] except PineconeApiException as api_error: - raise DestinationConnectionError(f"http error: {api_error}") from api_error - logger.debug(f"results: {results}") + raise DestinationConnectionError(f"HTTP error: {api_error}") from api_error + logger.debug(f"Results: {results}") def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None: logger.info( From d5a3cd495bef951116f1185942c2d7f0b1825495 Mon Sep 17 00:00:00 2001 From: Mateusz Kuprowski Date: Fri, 3 Jan 2025 15:23:52 +0100 Subject: [PATCH 2/9] Added pinecone namespace integration tests Bumped version --- CHANGELOG.md | 6 + test/integration/connectors/test_pinecone.py | 116 +++++++++++++++++++ unstructured_ingest/__version__.py | 2 +- 3 files changed, 123 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index caa9d87f2..75303eeec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ ## 0.3.12-dev3 +### Fixes + +* **Added possibility to use namespaces in pinecone connector** + +## 0.3.12-dev3 + ### Enhancements * **Migrate Vectara Destination Connector to v2** diff --git a/test/integration/connectors/test_pinecone.py b/test/integration/connectors/test_pinecone.py index 3c393bcfd..b6e58d0c2 100644 --- a/test/integration/connectors/test_pinecone.py +++ b/test/integration/connectors/test_pinecone.py @@ -286,3 +286,119 @@ def test_pinecone_stager( stager=stager, tmp_dir=tmp_path, ) + + +@requires_env(API_KEY) +@pytest.mark.asyncio +@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG) +async def test_pinecone_namespace_write_failure( + pinecone_index: str, upload_file: Path, temp_dir: Path +): + """ + Test to ensure that using a non-existent or invalid namespace parameter + fails as expected. + """ + namespace_to_fail = "invalid_namespace_test" + file_data = FileData( + source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name), + connector_type=CONNECTOR_TYPE, + identifier="pinecone_mock_id", + ) + + connection_config = PineconeConnectionConfig( + index_name=pinecone_index, + access_config=PineconeAccessConfig(api_key=get_api_key()), + ) + + stager_config = PineconeUploadStagerConfig() + stager = PineconeUploadStager(upload_stager_config=stager_config) + + new_upload_file = stager.run( + elements_filepath=upload_file, + output_dir=temp_dir, + output_filename=upload_file.name, + file_data=file_data, + ) + + # No need to create the namespace, as we expect this to fail + upload_config = PineconeUploaderConfig(namespace=namespace_to_fail) + uploader = PineconeUploader(connection_config=connection_config, upload_config=upload_config) + + # Precheck should pass overall, but the actual run might fail with the invalid namespace + uploader.precheck() + + try: + uploader.run(path=new_upload_file, file_data=file_data) + pytest.fail("Expected a failure when writing to a non-existent/invalid namespace.") + except DestinationConnectionError as e: + logger.info(f"Namespace write failure test passed: {e}") + + +@requires_env(API_KEY) +@pytest.mark.asyncio +@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG) +async def test_pinecone_namespace_write_success( + pinecone_index: str, upload_file: Path, temp_dir: Path +): + """ + Test to ensure data is written to a custom namespace successfully and + that everything is properly cleaned up afterward. + """ + test_namespace = "test_namespace_success" + + # Prepare test data + file_data = FileData( + source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name), + connector_type=CONNECTOR_TYPE, + identifier="pinecone_mock_id", + ) + + connection_config = PineconeConnectionConfig( + index_name=pinecone_index, + access_config=PineconeAccessConfig(api_key=get_api_key()), + ) + stager_config = PineconeUploadStagerConfig() + stager = PineconeUploadStager(upload_stager_config=stager_config) + + new_upload_file = stager.run( + elements_filepath=upload_file, + output_dir=temp_dir, + output_filename=upload_file.name, + file_data=file_data, + ) + + upload_config = PineconeUploaderConfig(namespace=test_namespace) + uploader = PineconeUploader(connection_config=connection_config, upload_config=upload_config) + uploader.precheck() + + uploader.run(path=new_upload_file, file_data=file_data) + + # Validate the vectors in our test namespace + pinecone_client = Pinecone(api_key=get_api_key()) + index = pinecone_client.Index(name=pinecone_index) + + index_stats = index.describe_index_stats(namespace=test_namespace) + total_vectors_in_namespace = index_stats["total_vector_count"] + + with new_upload_file.open() as f: + staged_content = json.load(f) + expected_num_of_vectors = len(staged_content) + + assert total_vectors_in_namespace == expected_num_of_vectors, ( + f"Expected {expected_num_of_vectors} vectors in namespace '{test_namespace}', " + f"but found {total_vectors_in_namespace}." + ) + logger.info( + f"Successfully wrote {total_vectors_in_namespace} vectors to namespace '{test_namespace}'." + ) + + # --- CLEANUP --- + try: + # Remove all vectors in our test namespace. + # This effectively cleans up the namespace, even though you can't + # literally delete a namespace from Pinecone. + delete_resp = index.delete(filter={}, namespace=test_namespace) + logger.info(f"Cleaned up all vectors from namespace '{test_namespace}': {delete_resp}") + except Exception as e: + logger.error(f"Error cleaning up namespace '{test_namespace}': {e}") + pytest.fail(f"Test failed to clean up namespace '{test_namespace}'.") diff --git a/unstructured_ingest/__version__.py b/unstructured_ingest/__version__.py index 3ad6f8bfe..5d08f71ba 100644 --- a/unstructured_ingest/__version__.py +++ b/unstructured_ingest/__version__.py @@ -1 +1 @@ -__version__ = "0.3.12-dev3" # pragma: no cover +__version__ = "0.3.12-dev4" # pragma: no cover From 9b33ca2b954e11fa85f4f88966bebe574a5a2990 Mon Sep 17 00:00:00 2001 From: Mateusz Kuprowski Date: Fri, 3 Jan 2025 15:56:30 +0100 Subject: [PATCH 3/9] Updated changelog Removed test that would not work --- CHANGELOG.md | 2 +- test/integration/connectors/test_pinecone.py | 75 ++++++-------------- 2 files changed, 21 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75303eeec..eeae4c28c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.3.12-dev3 +## 0.3.12-dev4 ### Fixes diff --git a/test/integration/connectors/test_pinecone.py b/test/integration/connectors/test_pinecone.py index b6e58d0c2..8ca838b00 100644 --- a/test/integration/connectors/test_pinecone.py +++ b/test/integration/connectors/test_pinecone.py @@ -4,7 +4,7 @@ import re import time from pathlib import Path -from typing import Generator +from typing import Generator, Optional from uuid import uuid4 import pytest @@ -109,14 +109,25 @@ def pinecone_index() -> Generator[str, None, None]: def validate_pinecone_index( - index_name: str, expected_num_of_vectors: int, retries=30, interval=1 + index_name: str, + expected_num_of_vectors: int, + namespace: Optional[str] = None, + retries=30, + interval=1, ) -> None: - # Because there's a delay for the index to catch up to the recent writes, add in a retry - pinecone = Pinecone(api_key=get_api_key()) - index = pinecone.Index(name=index_name) + """ + Validates that `expected_num_of_vectors` are present in a Pinecone index, + optionally in a specific namespace. + """ + pinecone_client = Pinecone(api_key=get_api_key()) + index = pinecone_client.Index(name=index_name) + vector_count = -1 for i in range(retries): - index_stats = index.describe_index_stats() + if namespace: + index_stats = index.describe_index_stats(namespace=namespace) + else: + index_stats = index.describe_index_stats() # all namespaces vector_count = index_stats["total_vector_count"] if vector_count == expected_num_of_vectors: logger.info(f"expected {expected_num_of_vectors} == vector count {vector_count}") @@ -125,9 +136,10 @@ def validate_pinecone_index( f"retry attempt {i}: expected {expected_num_of_vectors} != vector count {vector_count}" ) time.sleep(interval) + assert vector_count == expected_num_of_vectors, ( - f"vector count from index ({vector_count}) doesn't " - f"match expected number: {expected_num_of_vectors}" + f"vector count from index (namespace={namespace}) is {vector_count}, " + f"expected {expected_num_of_vectors}" ) @@ -287,53 +299,6 @@ def test_pinecone_stager( tmp_dir=tmp_path, ) - -@requires_env(API_KEY) -@pytest.mark.asyncio -@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG) -async def test_pinecone_namespace_write_failure( - pinecone_index: str, upload_file: Path, temp_dir: Path -): - """ - Test to ensure that using a non-existent or invalid namespace parameter - fails as expected. - """ - namespace_to_fail = "invalid_namespace_test" - file_data = FileData( - source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name), - connector_type=CONNECTOR_TYPE, - identifier="pinecone_mock_id", - ) - - connection_config = PineconeConnectionConfig( - index_name=pinecone_index, - access_config=PineconeAccessConfig(api_key=get_api_key()), - ) - - stager_config = PineconeUploadStagerConfig() - stager = PineconeUploadStager(upload_stager_config=stager_config) - - new_upload_file = stager.run( - elements_filepath=upload_file, - output_dir=temp_dir, - output_filename=upload_file.name, - file_data=file_data, - ) - - # No need to create the namespace, as we expect this to fail - upload_config = PineconeUploaderConfig(namespace=namespace_to_fail) - uploader = PineconeUploader(connection_config=connection_config, upload_config=upload_config) - - # Precheck should pass overall, but the actual run might fail with the invalid namespace - uploader.precheck() - - try: - uploader.run(path=new_upload_file, file_data=file_data) - pytest.fail("Expected a failure when writing to a non-existent/invalid namespace.") - except DestinationConnectionError as e: - logger.info(f"Namespace write failure test passed: {e}") - - @requires_env(API_KEY) @pytest.mark.asyncio @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG) From 9197bcc6e7ba5cf67f59560dfa45c16abc65b7e7 Mon Sep 17 00:00:00 2001 From: Mateusz Kuprowski Date: Tue, 7 Jan 2025 11:58:45 +0100 Subject: [PATCH 4/9] Small changes to vector verification --- test/integration/connectors/test_pinecone.py | 38 +++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/test/integration/connectors/test_pinecone.py b/test/integration/connectors/test_pinecone.py index 8ca838b00..38036e151 100644 --- a/test/integration/connectors/test_pinecone.py +++ b/test/integration/connectors/test_pinecone.py @@ -338,30 +338,42 @@ async def test_pinecone_namespace_write_success( uploader.run(path=new_upload_file, file_data=file_data) - # Validate the vectors in our test namespace - pinecone_client = Pinecone(api_key=get_api_key()) - index = pinecone_client.Index(name=pinecone_index) - - index_stats = index.describe_index_stats(namespace=test_namespace) - total_vectors_in_namespace = index_stats["total_vector_count"] - with new_upload_file.open() as f: staged_content = json.load(f) expected_num_of_vectors = len(staged_content) - assert total_vectors_in_namespace == expected_num_of_vectors, ( - f"Expected {expected_num_of_vectors} vectors in namespace '{test_namespace}', " - f"but found {total_vectors_in_namespace}." - ) - logger.info( - f"Successfully wrote {total_vectors_in_namespace} vectors to namespace '{test_namespace}'." + validate_pinecone_index( + index_name=pinecone_index, + expected_num_of_vectors=expected_num_of_vectors, + namespace="test_namespace_success", # or your test_namespace variable ) + # Validate the vectors in our test namespace + # pinecone_client = Pinecone(api_key=get_api_key()) + # index = pinecone_client.Index(name=pinecone_index) + + # index_stats = index.describe_index_stats(namespace=test_namespace) + # total_vectors_in_namespace = index_stats["total_vector_count"] + + # with new_upload_file.open() as f: + # staged_content = json.load(f) + # expected_num_of_vectors = len(staged_content) + + # assert total_vectors_in_namespace == expected_num_of_vectors, ( + # f"Expected {expected_num_of_vectors} vectors in namespace '{test_namespace}', " + # f"but found {total_vectors_in_namespace}." + # ) + # logger.info( + # f"Successfully wrote {total_vectors_in_namespace} vectors to namespace '{test_namespace}'." + # ) + # --- CLEANUP --- try: # Remove all vectors in our test namespace. # This effectively cleans up the namespace, even though you can't # literally delete a namespace from Pinecone. + pinecone_client = Pinecone(api_key=get_api_key()) + index = pinecone_client.Index(name=pinecone_index) delete_resp = index.delete(filter={}, namespace=test_namespace) logger.info(f"Cleaned up all vectors from namespace '{test_namespace}': {delete_resp}") except Exception as e: From fe19d32a4c826287f3a8c7e6576d0d1085c9f041 Mon Sep 17 00:00:00 2001 From: Mateusz Kuprowski Date: Tue, 7 Jan 2025 12:06:13 +0100 Subject: [PATCH 5/9] Ver bump --- unstructured_ingest/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_ingest/__version__.py b/unstructured_ingest/__version__.py index 5d08f71ba..4af193dbb 100644 --- a/unstructured_ingest/__version__.py +++ b/unstructured_ingest/__version__.py @@ -1 +1 @@ -__version__ = "0.3.12-dev4" # pragma: no cover +__version__ = "0.3.12-dev5" # pragma: no cover From 6f2bee68f21f948b70effdbd179cdb1e88a4bfee Mon Sep 17 00:00:00 2001 From: Mateusz Kuprowski Date: Tue, 7 Jan 2025 12:41:57 +0100 Subject: [PATCH 6/9] Fixed test cleanup stage --- test/integration/connectors/test_pinecone.py | 25 ++------------------ 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/test/integration/connectors/test_pinecone.py b/test/integration/connectors/test_pinecone.py index 38036e151..8e3e8643c 100644 --- a/test/integration/connectors/test_pinecone.py +++ b/test/integration/connectors/test_pinecone.py @@ -348,33 +348,12 @@ async def test_pinecone_namespace_write_success( namespace="test_namespace_success", # or your test_namespace variable ) - # Validate the vectors in our test namespace - # pinecone_client = Pinecone(api_key=get_api_key()) - # index = pinecone_client.Index(name=pinecone_index) - - # index_stats = index.describe_index_stats(namespace=test_namespace) - # total_vectors_in_namespace = index_stats["total_vector_count"] - - # with new_upload_file.open() as f: - # staged_content = json.load(f) - # expected_num_of_vectors = len(staged_content) - - # assert total_vectors_in_namespace == expected_num_of_vectors, ( - # f"Expected {expected_num_of_vectors} vectors in namespace '{test_namespace}', " - # f"but found {total_vectors_in_namespace}." - # ) - # logger.info( - # f"Successfully wrote {total_vectors_in_namespace} vectors to namespace '{test_namespace}'." - # ) - # --- CLEANUP --- try: - # Remove all vectors in our test namespace. - # This effectively cleans up the namespace, even though you can't - # literally delete a namespace from Pinecone. pinecone_client = Pinecone(api_key=get_api_key()) index = pinecone_client.Index(name=pinecone_index) - delete_resp = index.delete(filter={}, namespace=test_namespace) + # Use deleteAll=True to remove everything in that namespace + delete_resp = index.delete(deleteAll=True, namespace=test_namespace) logger.info(f"Cleaned up all vectors from namespace '{test_namespace}': {delete_resp}") except Exception as e: logger.error(f"Error cleaning up namespace '{test_namespace}': {e}") From 54a3cb273738b625dd94e4621fd37fb71184ae1d Mon Sep 17 00:00:00 2001 From: Mateusz Kuprowski Date: Tue, 7 Jan 2025 12:58:21 +0100 Subject: [PATCH 7/9] Linter fix --- test/integration/connectors/test_pinecone.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/integration/connectors/test_pinecone.py b/test/integration/connectors/test_pinecone.py index 8e3e8643c..92612e49e 100644 --- a/test/integration/connectors/test_pinecone.py +++ b/test/integration/connectors/test_pinecone.py @@ -299,6 +299,7 @@ def test_pinecone_stager( tmp_dir=tmp_path, ) + @requires_env(API_KEY) @pytest.mark.asyncio @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG) @@ -343,9 +344,9 @@ async def test_pinecone_namespace_write_success( expected_num_of_vectors = len(staged_content) validate_pinecone_index( - index_name=pinecone_index, - expected_num_of_vectors=expected_num_of_vectors, - namespace="test_namespace_success", # or your test_namespace variable + index_name=pinecone_index, + expected_num_of_vectors=expected_num_of_vectors, + namespace="test_namespace_success", # or your test_namespace variable ) # --- CLEANUP --- From 0b7da52bfd81ae1197589f4d402f38cd54502bd1 Mon Sep 17 00:00:00 2001 From: Mateusz Kuprowski Date: Fri, 10 Jan 2025 14:50:00 +0100 Subject: [PATCH 8/9] Code review commit --- .../v2/processes/connectors/pinecone.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/unstructured_ingest/v2/processes/connectors/pinecone.py b/unstructured_ingest/v2/processes/connectors/pinecone.py index 08309e50c..76c85a5d6 100644 --- a/unstructured_ingest/v2/processes/connectors/pinecone.py +++ b/unstructured_ingest/v2/processes/connectors/pinecone.py @@ -178,9 +178,9 @@ def precheck(self): def pod_delete_by_record_id(self, file_data: FileData) -> None: """Deletion for Pinecone Pod-based index.""" logger.debug( - f"Deleting any content with metadata " + f"deleting any content with metadata " f"{self.upload_config.record_id_key}={file_data.identifier} " - f"from Pinecone pod index" + f"from pinecone pod index" ) index = self.connection_config.get_index(pool_threads=MAX_POOL_THREADS) @@ -188,22 +188,22 @@ def pod_delete_by_record_id(self, file_data: FileData) -> None: delete_kwargs = { "filter": {self.upload_config.record_id_key: {"$eq": file_data.identifier}}, } - if self.upload_config.namespace is not None: - delete_kwargs["namespace"] = self.upload_config.namespace + if namespace := self.upload_config.namespace: + delete_kwargs["namespace"] = namespace resp = index.delete(**delete_kwargs) logger.debug( - f"Deleted any content with metadata " + f"deleted any content with metadata " f"{self.upload_config.record_id_key}={file_data.identifier} " - f"from Pinecone index: {resp}" + f"from pinecone index: {resp}" ) def serverless_delete_by_record_id(self, file_data: FileData) -> None: """Deletion for Pinecone Serverless index.""" logger.debug( - f"Deleting any content with metadata " + f"deleting any content with metadata " f"{self.upload_config.record_id_key}={file_data.identifier} " - f"from Pinecone serverless index" + f"from pinecone serverless index" ) index = self.connection_config.get_index(pool_threads=MAX_POOL_THREADS) @@ -220,12 +220,12 @@ def serverless_delete_by_record_id(self, file_data: FileData) -> None: delete_kwargs["namespace"] = self.upload_config.namespace delete_resp = index.delete(**delete_kwargs) if delete_resp: - logger.error(f"Failed to delete batch of IDs: {delete_resp}") + logger.error(f"failed to delete batch of IDs: {delete_resp}") logger.info( - f"Deleted {deleted_ids} records with metadata " + f"deleted {deleted_ids} records with metadata " f"{self.upload_config.record_id_key}={file_data.identifier} " - f"from Pinecone index" + f"from pinecone index" ) @requires_dependencies(["pinecone"], extras="pinecone") From 55f49774b3858061365a46bbff19ef41e4ed39aa Mon Sep 17 00:00:00 2001 From: Mateusz Kuprowski Date: Fri, 10 Jan 2025 15:05:54 +0100 Subject: [PATCH 9/9] ver bump --- unstructured_ingest/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unstructured_ingest/__version__.py b/unstructured_ingest/__version__.py index dff5c63d1..f53758827 100644 --- a/unstructured_ingest/__version__.py +++ b/unstructured_ingest/__version__.py @@ -1 +1 @@ -__version__ = "0.3.13-dev2" # pragma: no cover +__version__ = "0.3.13-dev3" # pragma: no cover