Skip to content

Commit 44c3055

Browse files
committed
Refactor Pinecone and import statements in populate_index and llm_utils
- Reorganize import statements in populate_index.py and llm_utils.py - Remove redundant Pinecone import in populate_index.py - Improve code formatting and import order - Minor code cleanup and optimization
1 parent 447d77c commit 44c3055

File tree

2 files changed

+21
-16
lines changed

2 files changed

+21
-16
lines changed

llm-complete-guide/steps/populate_index.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,14 @@
4646
)
4747
from pgvector.psycopg2 import register_vector
4848
from PIL import Image, ImageDraw, ImageFont
49+
from pinecone import Pinecone, ServerlessSpec
4950
from sentence_transformers import SentenceTransformer
5051
from structures import Document
5152
from utils.llm_utils import get_db_conn, get_es_client, split_documents
5253
from zenml import ArtifactConfig, log_metadata, step
5354
from zenml.client import Client
5455
from zenml.metadata.metadata_types import Uri
55-
import pinecone
56-
from pinecone import Pinecone, ServerlessSpec
56+
5757
logging.basicConfig(level=logging.INFO)
5858
logger = logging.getLogger(__name__)
5959

@@ -829,8 +829,12 @@ def _index_generator_pinecone(documents: str) -> None:
829829
documents (str): JSON string containing the documents to index.
830830
"""
831831
client = Client()
832-
pinecone_api_key = client.get_secret(SECRET_NAME_PINECONE).secret_values["pinecone_api_key"]
833-
index_name = client.get_secret(SECRET_NAME_PINECONE).secret_values.get("pinecone_index", "zenml-docs")
832+
pinecone_api_key = client.get_secret(SECRET_NAME_PINECONE).secret_values[
833+
"pinecone_api_key"
834+
]
835+
index_name = client.get_secret(SECRET_NAME_PINECONE).secret_values.get(
836+
"pinecone_index", "zenml-docs"
837+
)
834838

835839
# Initialize Pinecone
836840
pc = Pinecone(api_key=pinecone_api_key)
@@ -841,10 +845,7 @@ def _index_generator_pinecone(documents: str) -> None:
841845
name=index_name,
842846
dimension=EMBEDDING_DIMENSIONALITY,
843847
metric="cosine",
844-
spec=ServerlessSpec(
845-
cloud="aws",
846-
region="us-east-1"
847-
)
848+
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
848849
)
849850

850851
# Get the index
@@ -872,8 +873,8 @@ def _index_generator_pinecone(documents: str) -> None:
872873
"parent_section": doc["parent_section"],
873874
"url": doc["url"],
874875
"page_content": doc["page_content"],
875-
"token_count": doc["token_count"]
876-
}
876+
"token_count": doc["token_count"],
877+
},
877878
}
878879
batch.append(vector_record)
879880

@@ -886,7 +887,9 @@ def _index_generator_pinecone(documents: str) -> None:
886887
if batch:
887888
index.upsert(vectors=batch)
888889

889-
logger.info(f"Successfully indexed {len(docs)} documents to Pinecone index '{index_name}'")
890+
logger.info(
891+
f"Successfully indexed {len(docs)} documents to Pinecone index '{index_name}'"
892+
)
890893

891894

892895
def _log_metadata(index_type: IndexType) -> None:
@@ -930,7 +933,9 @@ def _log_metadata(index_type: IndexType) -> None:
930933
store_name = "pinecone"
931934
connection_details = {
932935
"api_key": "**********",
933-
"environment": client.get_secret(SECRET_NAME_PINECONE).secret_values["pinecone_env"],
936+
"environment": client.get_secret(
937+
SECRET_NAME_PINECONE
938+
).secret_values["pinecone_env"],
934939
}
935940

936941
log_metadata(

llm-complete-guide/utils/llm_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,16 @@
1919
# functionality
2020
# https://github.com/langchain-ai/langchain/blob/master/libs/text-splitters/langchain_text_splitters/character.py
2121

22+
import asyncio
2223
import logging
2324
import os
2425

25-
import asyncio
26+
import pinecone
2627
from elasticsearch import Elasticsearch
28+
from pinecone import Pinecone
2729
from zenml.client import Client
2830

2931
from utils.openai_utils import get_openai_api_key
30-
import pinecone
31-
from pinecone import Pinecone
3232

3333
# Configure logging levels for specific modules
3434
logging.getLogger("pytorch").setLevel(logging.CRITICAL)
@@ -40,7 +40,7 @@
4040
logging.getLogger().setLevel(logging.ERROR)
4141

4242
import re
43-
from typing import List, Tuple, Optional
43+
from typing import List, Optional, Tuple
4444

4545
# import litellm
4646
import numpy as np

0 commit comments

Comments
 (0)