2
2
from __future__ import annotations
3
3
4
4
import uuid
5
- from typing import Any , Callable , Iterable , List , Optional
5
+ from typing import Any , Callable , Iterable , List , Optional , Tuple
6
6
7
7
from langchain .docstore .document import Document
8
8
from langchain .embeddings .base import Embeddings
@@ -46,16 +46,21 @@ def __init__(
46
46
self ._text_key = text_key
47
47
48
48
def add_texts (
49
- self , texts : Iterable [str ], metadatas : Optional [List [dict ]] = None
49
+ self ,
50
+ texts : Iterable [str ],
51
+ metadatas : Optional [List [dict ]] = None ,
52
+ namespace : Optional [str ] = None ,
50
53
) -> List [str ]:
51
54
"""Run more texts through the embeddings and add to the vectorstore.
52
55
53
56
Args:
54
57
texts: Iterable of strings to add to the vectorstore.
55
58
metadatas: Optional list of metadatas associated with the texts.
59
+ namespace: Optional pinecone namespace to add the texts to.
56
60
57
61
Returns:
58
62
List of ids from adding the texts into the vectorstore.
63
+
59
64
"""
60
65
# Embed and create the documents
61
66
docs = []
@@ -68,14 +73,57 @@ def add_texts(
68
73
docs .append ((id , embedding , metadata ))
69
74
ids .append (id )
70
75
# upsert to Pinecone
71
- self ._index .upsert (vectors = docs )
76
+ self ._index .upsert (vectors = docs , namespace = namespace )
72
77
return ids
73
78
74
- def similarity_search (self , query : str , k : int = 5 ) -> List [Document ]:
75
- """Look up similar documents in pinecone."""
79
+ def similarity_search_with_score (
80
+ self ,
81
+ query : str ,
82
+ k : int = 5 ,
83
+ namespace : Optional [str ] = None ,
84
+ ) -> List [Tuple [Document , float ]]:
85
+ """Return pinecone documents most similar to query, along with scores.
86
+
87
+ Args:
88
+ query: Text to look up documents similar to.
89
+ k: Number of Documents to return. Defaults to 4.
90
+ namespace: Namespace to search in. Default will search in '' namespace.
91
+
92
+ Returns:
93
+ List of Documents most similar to the query and score for each
94
+ """
95
+ query_obj = self ._embedding_function (query )
96
+ docs = []
97
+ results = self ._index .query (
98
+ [query_obj ], top_k = k , include_metadata = True , namespace = namespace
99
+ )
100
+ for res in results ["matches" ]:
101
+ metadata = res ["metadata" ]
102
+ text = metadata .pop (self ._text_key )
103
+ docs .append ((Document (page_content = text , metadata = metadata ), res ["score" ]))
104
+ return docs
105
+
106
+ def similarity_search (
107
+ self ,
108
+ query : str ,
109
+ k : int = 5 ,
110
+ namespace : Optional [str ] = None ,
111
+ ) -> List [Document ]:
112
+ """Return pinecone documents most similar to query.
113
+
114
+ Args:
115
+ query: Text to look up documents similar to.
116
+ k: Number of Documents to return. Defaults to 4.
117
+ namespace: Namespace to search in. Default will search in '' namespace.
118
+
119
+ Returns:
120
+ List of Documents most similar to the query and score for each
121
+ """
76
122
query_obj = self ._embedding_function (query )
77
123
docs = []
78
- results = self ._index .query ([query_obj ], top_k = k , include_metadata = True )
124
+ results = self ._index .query (
125
+ [query_obj ], top_k = k , include_metadata = True , namespace = namespace
126
+ )
79
127
for res in results ["matches" ]:
80
128
metadata = res ["metadata" ]
81
129
text = metadata .pop (self ._text_key )
@@ -132,7 +180,7 @@ def from_texts(
132
180
i_end = min (i + batch_size , len (texts ))
133
181
# get batch of texts and ids
134
182
lines_batch = texts [i : i + batch_size ]
135
- ids_batch = [str (n ) for n in range (i , i_end )]
183
+ ids_batch = [str (uuid . uuid4 () ) for n in range (i , i_end )]
136
184
# create embeddings
137
185
embeds = embedding .embed_documents (lines_batch )
138
186
# prep metadata and upsert batch
@@ -150,3 +198,18 @@ def from_texts(
150
198
# upsert to Pinecone
151
199
index .upsert (vectors = list (to_upsert ), namespace = namespace )
152
200
return cls (index , embedding .embed_query , text_key )
201
+
202
+ @classmethod
203
+ def from_existing_index (
204
+ cls , index_name : str , embedding : Embeddings , text_key : str = "text"
205
+ ) -> Pinecone :
206
+ """Load pinecone vectorstore from index name."""
207
+ try :
208
+ import pinecone
209
+ except ImportError :
210
+ raise ValueError (
211
+ "Could not import pinecone python package. "
212
+ "Please install it with `pip install pinecone-client`."
213
+ )
214
+
215
+ return cls (pinecone .Index (index_name ), embedding .embed_query , text_key )
0 commit comments