4646)
4747from pgvector .psycopg2 import register_vector
4848from PIL import Image , ImageDraw , ImageFont
49+ from pinecone import Pinecone , ServerlessSpec
4950from sentence_transformers import SentenceTransformer
5051from structures import Document
5152from utils .llm_utils import get_db_conn , get_es_client , split_documents
5253from zenml import ArtifactConfig , log_metadata , step
5354from zenml .client import Client
5455from zenml .metadata .metadata_types import Uri
55- import pinecone
56- from pinecone import Pinecone , ServerlessSpec
56+
5757logging .basicConfig (level = logging .INFO )
5858logger = logging .getLogger (__name__ )
5959
@@ -829,8 +829,12 @@ def _index_generator_pinecone(documents: str) -> None:
829829 documents (str): JSON string containing the documents to index.
830830 """
831831 client = Client ()
832- pinecone_api_key = client .get_secret (SECRET_NAME_PINECONE ).secret_values ["pinecone_api_key" ]
833- index_name = client .get_secret (SECRET_NAME_PINECONE ).secret_values .get ("pinecone_index" , "zenml-docs" )
832+ pinecone_api_key = client .get_secret (SECRET_NAME_PINECONE ).secret_values [
833+ "pinecone_api_key"
834+ ]
835+ index_name = client .get_secret (SECRET_NAME_PINECONE ).secret_values .get (
836+ "pinecone_index" , "zenml-docs"
837+ )
834838
835839 # Initialize Pinecone
836840 pc = Pinecone (api_key = pinecone_api_key )
@@ -841,10 +845,7 @@ def _index_generator_pinecone(documents: str) -> None:
841845 name = index_name ,
842846 dimension = EMBEDDING_DIMENSIONALITY ,
843847 metric = "cosine" ,
844- spec = ServerlessSpec (
845- cloud = "aws" ,
846- region = "us-east-1"
847- )
848+ spec = ServerlessSpec (cloud = "aws" , region = "us-east-1" ),
848849 )
849850
850851 # Get the index
@@ -872,8 +873,8 @@ def _index_generator_pinecone(documents: str) -> None:
872873 "parent_section" : doc ["parent_section" ],
873874 "url" : doc ["url" ],
874875 "page_content" : doc ["page_content" ],
875- "token_count" : doc ["token_count" ]
876- }
876+ "token_count" : doc ["token_count" ],
877+ },
877878 }
878879 batch .append (vector_record )
879880
@@ -886,7 +887,9 @@ def _index_generator_pinecone(documents: str) -> None:
886887 if batch :
887888 index .upsert (vectors = batch )
888889
889- logger .info (f"Successfully indexed { len (docs )} documents to Pinecone index '{ index_name } '" )
890+ logger .info (
891+ f"Successfully indexed { len (docs )} documents to Pinecone index '{ index_name } '"
892+ )
890893
891894
892895def _log_metadata (index_type : IndexType ) -> None :
@@ -930,7 +933,9 @@ def _log_metadata(index_type: IndexType) -> None:
930933 store_name = "pinecone"
931934 connection_details = {
932935 "api_key" : "**********" ,
933- "environment" : client .get_secret (SECRET_NAME_PINECONE ).secret_values ["pinecone_env" ],
936+ "environment" : client .get_secret (
937+ SECRET_NAME_PINECONE
938+ ).secret_values ["pinecone_env" ],
934939 }
935940
936941 log_metadata (
0 commit comments