Skip to content

Commit 69998b5

Browse files
authored
Add ids parameter for pinecone from_texts / add_texts (#659)
Allow optionally specifying a list of ids for pinecone rather than having them randomly generated. This also permits editing the embedding/metadata of existing pinecone entries, by id.
1 parent 54d7f1c commit 69998b5

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

langchain/vectorstores/pinecone.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,15 @@ def add_texts(
5454
self,
5555
texts: Iterable[str],
5656
metadatas: Optional[List[dict]] = None,
57+
ids: Optional[List[str]] = None,
5758
namespace: Optional[str] = None,
5859
) -> List[str]:
5960
"""Run more texts through the embeddings and add to the vectorstore.
6061
6162
Args:
6263
texts: Iterable of strings to add to the vectorstore.
6364
metadatas: Optional list of metadatas associated with the texts.
65+
ids: Optional list of ids to associate with the texts.
6466
namespace: Optional pinecone namespace to add the texts to.
6567
6668
Returns:
@@ -69,14 +71,12 @@ def add_texts(
6971
"""
7072
# Embed and create the documents
7173
docs = []
72-
ids = []
74+
ids = ids or [str(uuid.uuid4()) for _ in texts]
7375
for i, text in enumerate(texts):
74-
id = str(uuid.uuid4())
7576
embedding = self._embedding_function(text)
7677
metadata = metadatas[i] if metadatas else {}
7778
metadata[self._text_key] = text
78-
docs.append((id, embedding, metadata))
79-
ids.append(id)
79+
docs.append((ids[i], embedding, metadata))
8080
# upsert to Pinecone
8181
self._index.upsert(vectors=docs, namespace=namespace)
8282
return ids
@@ -153,6 +153,7 @@ def from_texts(
153153
texts: List[str],
154154
embedding: Embeddings,
155155
metadatas: Optional[List[dict]] = None,
156+
ids: Optional[List[str]] = None,
156157
batch_size: int = 32,
157158
text_key: str = "text",
158159
index_name: Optional[str] = None,
@@ -197,7 +198,11 @@ def from_texts(
197198
i_end = min(i + batch_size, len(texts))
198199
# get batch of texts and ids
199200
lines_batch = texts[i : i + batch_size]
200-
ids_batch = [str(uuid.uuid4()) for n in range(i, i_end)]
201+
# create ids if not provided
202+
if ids:
203+
ids_batch = ids[i : i + batch_size]
204+
else:
205+
ids_batch = [str(uuid.uuid4()) for n in range(i, i_end)]
201206
# create embeddings
202207
embeds = embedding.embed_documents(lines_batch)
203208
# prep metadata and upsert batch

0 commit comments

Comments
 (0)