Skip to content

Commit a5ee7de

Browse files
hwchase17Who828iocuydi
authored
pinecone changes (#590)
Co-authored-by: Smit Shah <[email protected]> Co-authored-by: iocuydi <[email protected]>
1 parent 7b6e7f6 commit a5ee7de

File tree

1 file changed

+70
-7
lines changed

1 file changed

+70
-7
lines changed

langchain/vectorstores/pinecone.py

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from __future__ import annotations
33

44
import uuid
5-
from typing import Any, Callable, Iterable, List, Optional
5+
from typing import Any, Callable, Iterable, List, Optional, Tuple
66

77
from langchain.docstore.document import Document
88
from langchain.embeddings.base import Embeddings
@@ -46,16 +46,21 @@ def __init__(
4646
self._text_key = text_key
4747

4848
def add_texts(
49-
self, texts: Iterable[str], metadatas: Optional[List[dict]] = None
49+
self,
50+
texts: Iterable[str],
51+
metadatas: Optional[List[dict]] = None,
52+
namespace: Optional[str] = None,
5053
) -> List[str]:
5154
"""Run more texts through the embeddings and add to the vectorstore.
5255
5356
Args:
5457
texts: Iterable of strings to add to the vectorstore.
5558
metadatas: Optional list of metadatas associated with the texts.
59+
namespace: Optional pinecone namespace to add the texts to.
5660
5761
Returns:
5862
List of ids from adding the texts into the vectorstore.
63+
5964
"""
6065
# Embed and create the documents
6166
docs = []
@@ -68,14 +73,57 @@ def add_texts(
6873
docs.append((id, embedding, metadata))
6974
ids.append(id)
7075
# upsert to Pinecone
71-
self._index.upsert(vectors=docs)
76+
self._index.upsert(vectors=docs, namespace=namespace)
7277
return ids
7378

74-
def similarity_search(self, query: str, k: int = 5) -> List[Document]:
75-
"""Look up similar documents in pinecone."""
79+
def similarity_search_with_score(
80+
self,
81+
query: str,
82+
k: int = 5,
83+
namespace: Optional[str] = None,
84+
) -> List[Tuple[Document, float]]:
85+
"""Return pinecone documents most similar to query, along with scores.
86+
87+
Args:
88+
query: Text to look up documents similar to.
89+
k: Number of Documents to return. Defaults to 4.
90+
namespace: Namespace to search in. Default will search in '' namespace.
91+
92+
Returns:
93+
List of Documents most similar to the query and score for each
94+
"""
95+
query_obj = self._embedding_function(query)
96+
docs = []
97+
results = self._index.query(
98+
[query_obj], top_k=k, include_metadata=True, namespace=namespace
99+
)
100+
for res in results["matches"]:
101+
metadata = res["metadata"]
102+
text = metadata.pop(self._text_key)
103+
docs.append((Document(page_content=text, metadata=metadata), res["score"]))
104+
return docs
105+
106+
def similarity_search(
107+
self,
108+
query: str,
109+
k: int = 5,
110+
namespace: Optional[str] = None,
111+
) -> List[Document]:
112+
"""Return pinecone documents most similar to query.
113+
114+
Args:
115+
query: Text to look up documents similar to.
116+
k: Number of Documents to return. Defaults to 4.
117+
namespace: Namespace to search in. Default will search in '' namespace.
118+
119+
Returns:
120+
List of Documents most similar to the query and score for each
121+
"""
76122
query_obj = self._embedding_function(query)
77123
docs = []
78-
results = self._index.query([query_obj], top_k=k, include_metadata=True)
124+
results = self._index.query(
125+
[query_obj], top_k=k, include_metadata=True, namespace=namespace
126+
)
79127
for res in results["matches"]:
80128
metadata = res["metadata"]
81129
text = metadata.pop(self._text_key)
@@ -132,7 +180,7 @@ def from_texts(
132180
i_end = min(i + batch_size, len(texts))
133181
# get batch of texts and ids
134182
lines_batch = texts[i : i + batch_size]
135-
ids_batch = [str(n) for n in range(i, i_end)]
183+
ids_batch = [str(uuid.uuid4()) for n in range(i, i_end)]
136184
# create embeddings
137185
embeds = embedding.embed_documents(lines_batch)
138186
# prep metadata and upsert batch
@@ -150,3 +198,18 @@ def from_texts(
150198
# upsert to Pinecone
151199
index.upsert(vectors=list(to_upsert), namespace=namespace)
152200
return cls(index, embedding.embed_query, text_key)
201+
202+
@classmethod
203+
def from_existing_index(
204+
cls, index_name: str, embedding: Embeddings, text_key: str = "text"
205+
) -> Pinecone:
206+
"""Load pinecone vectorstore from index name."""
207+
try:
208+
import pinecone
209+
except ImportError:
210+
raise ValueError(
211+
"Could not import pinecone python package. "
212+
"Please install it with `pip install pinecone-client`."
213+
)
214+
215+
return cls(pinecone.Index(index_name), embedding.embed_query, text_key)

0 commit comments

Comments
 (0)