Skip to content

Commit b24d774

Browse files
authored
Fixes #4242: The Pinecone APOC implementation is misleading (#4250)
* Fixes #4242: The Pinecone APOC implementation is misleading * Changes review pinecone.adoc
1 parent dcba786 commit b24d774

File tree

6 files changed

+89
-82
lines changed

6 files changed

+89
-82
lines changed

docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/pinecone.adoc

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,33 @@
11

22
= Pinecone
33

4+
[NOTE]
5+
====
6+
In Pinecone a collection is a static and non-queryable copy of an index,
7+
therefore, unlike other vector dbs, the Pinecone procedures work on indexes instead of collections.
8+
9+
However, the vectordb procedures to handle CRUD operations on collections are usually named `apoc.ml.<vdbname>.createCollection` and `apoc.ml.<vdbname>.deleteCollection`,
10+
so to be consistent, the Pinecone index procedures are named `apoc.ml.pinecone.createCollection` and `apoc.ml.pinecone.deleteCollection`.
11+
====
12+
413
Here is a list of all available Pinecone procedures:
514

615
[opts=header, cols="1, 3"]
716
|===
817
| name | description
9-
| apoc.vectordb.pinecone.info(hostOrKey, collection, $config) | Get information about the specified existing collection or throws a 404 error if it does not exist
18+
| apoc.vectordb.pinecone.info(hostOrKey, index, $config) | Get information about the specified existing index or throws a 404 error if it does not exist
1019
| apoc.vectordb.pinecone.createCollection(hostOrKey, index, similarity, size, $config) |
1120
Creates an index, with the name specified in the 2nd parameter, and with the specified `similarity` and `size`.
1221
The default endpoint is `<hostOrKey param>/indexes`.
1322
| apoc.vectordb.pinecone.deleteCollection(hostOrKey, index, $config) |
1423
Deletes an index with the name specified in the 2nd parameter.
15-
The default endpoint is `<hostOrKey param>/indexes/<collection param>`.
24+
The default endpoint is `<hostOrKey param>/indexes/<index param>`.
1625
| apoc.vectordb.pinecone.upsert(hostOrKey, index, vectors, $config) |
1726
Upserts, in the index with the name specified in the 2nd parameter, the vectors [{id: 'id', vector: '<vectorDb>', medatada: '<metadata>'}].
1827
The default endpoint is `<hostOrKey param>/vectors/upsert`.
1928
| apoc.vectordb.pinecone.delete(hostOrKey, index, ids, $config) |
2029
Delete the vectors with the specified `ids`.
21-
The default endpoint is `<hostOrKey param>/indexes/<collection param>`.
30+
The default endpoint is `<hostOrKey param>/indexes/<index param>`.
2231
| apoc.vectordb.pinecone.get(hostOrKey, index, ids, $config) |
2332
Get the vectors with the specified `ids`.
2433
The default endpoint is `<hostOrKey param>/vectors/fetch`.
@@ -35,15 +44,6 @@ Here is a list of all available Pinecone procedures:
3544

3645
where the 1st parameter can be a key defined by the apoc config `apoc.pinecone.<key>.host=myHost`.
3746

38-
[NOTE]
39-
====
40-
The procedures create/drop/handle an index, instead of a collection like the other vectordb procedures,
41-
since in Pinecone a collection is a static and non-queryable copy of an index.
42-
43-
Anyway, the create / delete index procedures are named `.createCollection` and `.deleteCollection` to be consistent with the other.
44-
====
45-
46-
4747
The default `hostOrKey` is `"https://api.pinecone.io"`,
4848
therefore in general can be null with the `createCollection` and `deleteCollection` procedures,
4949
and equal to the host name, with the other ones, that is, the one indicated in the Pinecone dashboard:
@@ -55,10 +55,10 @@ image::pinecone-index.png[width=800]
5555

5656
The following example assume we want to create and manage an index called `test-index`.
5757

58-
.Get collection info (it leverages https://docs.pinecone.io/reference/api/control-plane/describe_collection[this API])
58+
.Get index info (it leverages https://docs.pinecone.io/guides/indexes/view-index-information[this API])
5959
[source,cypher]
6060
----
61-
CALL apoc.vectordb.pinecone.info(hostOrKey, 'test-collection', {<optional config>})
61+
CALL apoc.vectordb.pinecone.info(hostOrKey, 'test-index', {<optional config>})
6262
----
6363

6464
.Example results
@@ -67,7 +67,7 @@ CALL apoc.vectordb.pinecone.info(hostOrKey, 'test-collection', {<optional config
6767
| value
6868
| { "dimension": 3,
6969
"environment": "us-east1-gcp",
70-
"name": "tiny-collection",
70+
"name": "tiny-index",
7171
"size": 3126700,
7272
"status": "Ready",
7373
"vector_count": 99
@@ -262,7 +262,7 @@ It is possible to execute vector db procedures together with the xref::ml/rag.ad
262262

263263
[source,cypher]
264264
----
265-
CALL apoc.vectordb.pinecone.getAndUpdate($host, $collection, [<id1>, <id2>], $conf) YIELD node, metadata, id, vector
265+
CALL apoc.vectordb.pinecone.getAndUpdate($host, $index, [<id1>, <id2>], $conf) YIELD node, metadata, id, vector
266266
WITH collect(node) as paths
267267
CALL apoc.ml.rag(paths, $attributes, $question, $confPrompt) YIELD value
268268
RETURN value

extended/src/main/java/apoc/vectordb/Pinecone.java

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,12 @@ public class Pinecone {
4343
public URLAccessChecker urlAccessChecker;
4444

4545
@Procedure("apoc.vectordb.pinecone.info")
46-
@Description("apoc.vectordb.pinecone.info(hostOrKey, collection, $configuration) - Get information about the specified existing collection or throws an error if it does not exist")
46+
@Description("apoc.vectordb.pinecone.info(hostOrKey, index, $configuration) - Get information about the specified existing index or throws an error if it does not exist")
4747
public Stream<MapResult> getInfo(@Name("hostOrKey") String hostOrKey,
48-
@Name("collection") String collection,
48+
@Name("index") String index,
4949
@Name(value = "configuration", defaultValue = "{}") Map<String, Object> configuration) throws Exception {
50-
String url = "%s/collections/%s";
51-
Map<String, Object> config = getVectorDbInfo(hostOrKey, collection, configuration, url);
50+
String url = "%s/indexes/%s";
51+
Map<String, Object> config = getVectorDbInfo(hostOrKey, index, configuration, url);
5252

5353
methodAndPayloadNull(config);
5454

@@ -59,18 +59,18 @@ public Stream<MapResult> getInfo(@Name("hostOrKey") String hostOrKey,
5959
}
6060

6161
@Procedure("apoc.vectordb.pinecone.createCollection")
62-
@Description("apoc.vectordb.pinecone.createCollection(hostOrKey, collection, similarity, size, $configuration) - Creates a collection, with the name specified in the 2nd parameter, and with the specified `similarity` and `size`")
62+
@Description("apoc.vectordb.pinecone.createCollection(hostOrKey, index, similarity, size, $configuration) - Creates a index, with the name specified in the 2nd parameter, and with the specified `similarity` and `size`")
6363
public Stream<MapResult> createCollection(@Name("hostOrKey") String hostOrKey,
64-
@Name("collection") String collection,
64+
@Name("index") String index,
6565
@Name("similarity") String similarity,
6666
@Name("size") Long size,
6767
@Name(value = "configuration", defaultValue = "{}") Map<String, Object> configuration) throws Exception {
6868
String url = "%s/indexes";
69-
Map<String, Object> config = getVectorDbInfo(hostOrKey, collection, configuration, url);
69+
Map<String, Object> config = getVectorDbInfo(hostOrKey, index, configuration, url);
7070
config.putIfAbsent(METHOD_KEY, "POST");
7171

7272
Map<String, Object> additionalBodies = Map.of(
73-
"name", collection,
73+
"name", index,
7474
"dimension", size,
7575
"metric", similarity
7676
);
@@ -81,14 +81,14 @@ public Stream<MapResult> createCollection(@Name("hostOrKey") String hostOrKey,
8181
}
8282

8383
@Procedure("apoc.vectordb.pinecone.deleteCollection")
84-
@Description("apoc.vectordb.pinecone.deleteCollection(hostOrKey, collection, $configuration) - Deletes a collection with the name specified in the 2nd parameter")
84+
@Description("apoc.vectordb.pinecone.deleteCollection(hostOrKey, index, $configuration) - Deletes a index with the name specified in the 2nd parameter")
8585
public Stream<MapResult> deleteCollection(
8686
@Name("hostOrKey") String hostOrKey,
87-
@Name("collection") String collection,
87+
@Name("index") String index,
8888
@Name(value = "configuration", defaultValue = "{}") Map<String, Object> configuration) throws Exception {
8989

9090
String url = "%s/indexes/%s";
91-
Map<String, Object> config = getVectorDbInfo(hostOrKey, collection, configuration, url);
91+
Map<String, Object> config = getVectorDbInfo(hostOrKey, index, configuration, url);
9292
config.putIfAbsent(METHOD_KEY, "DELETE");
9393

9494
RestAPIConfig restAPIConfig = new RestAPIConfig(config);
@@ -98,16 +98,16 @@ public Stream<MapResult> deleteCollection(
9898
}
9999

100100
@Procedure("apoc.vectordb.pinecone.upsert")
101-
@Description("apoc.vectordb.pinecone.upsert(hostOrKey, collection, vectors, $configuration) - Upserts, in the collection with the name specified in the 2nd parameter, the vectors [{id: 'id', vector: '<vectorDb>', medatada: '<metadata>'}]")
101+
@Description("apoc.vectordb.pinecone.upsert(hostOrKey, index, vectors, $configuration) - Upserts, in the index with the name specified in the 2nd parameter, the vectors [{id: 'id', vector: '<vectorDb>', medatada: '<metadata>'}]")
102102
public Stream<MapResult> upsert(
103103
@Name("hostOrKey") String hostOrKey,
104-
@Name("collection") String collection,
104+
@Name("index") String index,
105105
@Name("vectors") List<Map<String, Object>> vectors,
106106
@Name(value = "configuration", defaultValue = "{}") Map<String, Object> configuration) throws Exception {
107107

108108
String url = "%s/vectors/upsert";
109109

110-
Map<String, Object> config = getVectorDbInfo(hostOrKey, collection, configuration, url);
110+
Map<String, Object> config = getVectorDbInfo(hostOrKey, index, configuration, url);
111111
config.putIfAbsent(METHOD_KEY, "POST");
112112

113113
vectors = vectors.stream()
@@ -126,15 +126,15 @@ public Stream<MapResult> upsert(
126126
}
127127

128128
@Procedure("apoc.vectordb.pinecone.delete")
129-
@Description("apoc.vectordb.pinecone.delete(hostOrKey, collection, ids, $configuration) - Delete the vectors with the specified `ids`")
129+
@Description("apoc.vectordb.pinecone.delete(hostOrKey, index, ids, $configuration) - Delete the vectors with the specified `ids`")
130130
public Stream<MapResult> delete(
131131
@Name("hostOrKey") String hostOrKey,
132-
@Name("collection") String collection,
132+
@Name("index") String index,
133133
@Name("vectors") List<Object> ids,
134134
@Name(value = "configuration", defaultValue = "{}") Map<String, Object> configuration) throws Exception {
135135

136136
String url = "%s/vectors/delete";
137-
Map<String, Object> config = getVectorDbInfo(hostOrKey, collection, configuration, url);
137+
Map<String, Object> config = getVectorDbInfo(hostOrKey, index, configuration, url);
138138
config.putIfAbsent(METHOD_KEY, "POST");
139139

140140
Map<String, Object> additionalBodies = Map.of("ids", ids);
@@ -145,29 +145,29 @@ public Stream<MapResult> delete(
145145
}
146146

147147
@Procedure(value = "apoc.vectordb.pinecone.get")
148-
@Description("apoc.vectordb.pinecone.get(hostOrKey, collection, ids, $configuration) - Get the vectors with the specified `ids`")
148+
@Description("apoc.vectordb.pinecone.get(hostOrKey, index, ids, $configuration) - Get the vectors with the specified `ids`")
149149
public Stream<VectorDbUtil.EmbeddingResult> get(@Name("hostOrKey") String hostOrKey,
150-
@Name("collection") String collection,
150+
@Name("index") String index,
151151
@Name("ids") List<Object> ids,
152152
@Name(value = "configuration", defaultValue = "{}") Map<String, Object> configuration) throws Exception {
153153
setReadOnlyMappingMode(configuration);
154-
return getCommon(hostOrKey, collection, ids, configuration);
154+
return getCommon(hostOrKey, index, ids, configuration);
155155
}
156156

157157
@Procedure(value = "apoc.vectordb.pinecone.getAndUpdate", mode = Mode.WRITE)
158-
@Description("apoc.vectordb.pinecone.getAndUpdate(hostOrKey, collection, ids, $configuration) - Get the vectors with the specified `ids`")
158+
@Description("apoc.vectordb.pinecone.getAndUpdate(hostOrKey, index, ids, $configuration) - Get the vectors with the specified `ids`")
159159
public Stream<VectorDbUtil.EmbeddingResult> getAndUpdate(@Name("hostOrKey") String hostOrKey,
160-
@Name("collection") String collection,
160+
@Name("index") String index,
161161
@Name("ids") List<Object> ids,
162162
@Name(value = "configuration", defaultValue = "{}") Map<String, Object> configuration) throws Exception {
163-
return getCommon(hostOrKey, collection, ids, configuration);
163+
return getCommon(hostOrKey, index, ids, configuration);
164164
}
165165

166-
private Stream<VectorDbUtil.EmbeddingResult> getCommon(String hostOrKey, String collection, List<Object> ids, Map<String, Object> configuration) throws Exception {
166+
private Stream<VectorDbUtil.EmbeddingResult> getCommon(String hostOrKey, String index, List<Object> ids, Map<String, Object> configuration) throws Exception {
167167
String url = "%s/vectors/fetch";
168-
Map<String, Object> config = getVectorDbInfo(hostOrKey, collection, configuration, url);
168+
Map<String, Object> config = getVectorDbInfo(hostOrKey, index, configuration, url);
169169

170-
VectorEmbeddingConfig conf = DB_HANDLER.getEmbedding().fromGet(config, procedureCallContext, ids, collection);
170+
VectorEmbeddingConfig conf = DB_HANDLER.getEmbedding().fromGet(config, procedureCallContext, ids, index);
171171

172172
return getEmbeddingResultStream(conf, procedureCallContext, urlAccessChecker, tx,
173173
v -> {
@@ -178,33 +178,33 @@ private Stream<VectorDbUtil.EmbeddingResult> getCommon(String hostOrKey, String
178178
}
179179

180180
@Procedure(value = "apoc.vectordb.pinecone.query")
181-
@Description("apoc.vectordb.pinecone.query(hostOrKey, collection, vector, filter, limit, $configuration) - Retrieve closest vectors the the defined `vector`, `limit` of results, in the collection with the name specified in the 2nd parameter")
181+
@Description("apoc.vectordb.pinecone.query(hostOrKey, index, vector, filter, limit, $configuration) - Retrieve closest vectors the the defined `vector`, `limit` of results, in the index with the name specified in the 2nd parameter")
182182
public Stream<VectorDbUtil.EmbeddingResult> query(@Name("hostOrKey") String hostOrKey,
183-
@Name("collection") String collection,
183+
@Name("index") String index,
184184
@Name(value = "vector", defaultValue = "[]") List<Double> vector,
185185
@Name(value = "filter", defaultValue = "{}") Map<String, Object> filter,
186186
@Name(value = "limit", defaultValue = "10") long limit,
187187
@Name(value = "configuration", defaultValue = "{}") Map<String, Object> configuration) throws Exception {
188188
setReadOnlyMappingMode(configuration);
189-
return queryCommon(hostOrKey, collection, vector, filter, limit, configuration);
189+
return queryCommon(hostOrKey, index, vector, filter, limit, configuration);
190190
}
191191

192192
@Procedure(value = "apoc.vectordb.pinecone.queryAndUpdate", mode = Mode.WRITE)
193-
@Description("apoc.vectordb.pinecone.queryAndUpdate(hostOrKey, collection, vector, filter, limit, $configuration) - Retrieve closest vectors the the defined `vector`, `limit` of results, in the collection with the name specified in the 2nd parameter")
193+
@Description("apoc.vectordb.pinecone.queryAndUpdate(hostOrKey, index, vector, filter, limit, $configuration) - Retrieve closest vectors the the defined `vector`, `limit` of results, in the index with the name specified in the 2nd parameter")
194194
public Stream<VectorDbUtil.EmbeddingResult> queryAndUpdate(@Name("hostOrKey") String hostOrKey,
195-
@Name("collection") String collection,
195+
@Name("index") String index,
196196
@Name(value = "vector", defaultValue = "[]") List<Double> vector,
197197
@Name(value = "filter", defaultValue = "{}") Map<String, Object> filter,
198198
@Name(value = "limit", defaultValue = "10") long limit,
199199
@Name(value = "configuration", defaultValue = "{}") Map<String, Object> configuration) throws Exception {
200-
return queryCommon(hostOrKey, collection, vector, filter, limit, configuration);
200+
return queryCommon(hostOrKey, index, vector, filter, limit, configuration);
201201
}
202202

203-
private Stream<VectorDbUtil.EmbeddingResult> queryCommon(String hostOrKey, String collection, List<Double> vector, Map<String, Object> filter, long limit, Map<String, Object> configuration) throws Exception {
203+
private Stream<VectorDbUtil.EmbeddingResult> queryCommon(String hostOrKey, String index, List<Double> vector, Map<String, Object> filter, long limit, Map<String, Object> configuration) throws Exception {
204204
String url = "%s/query";
205-
Map<String, Object> config = getVectorDbInfo(hostOrKey, collection, configuration, url);
205+
Map<String, Object> config = getVectorDbInfo(hostOrKey, index, configuration, url);
206206

207-
VectorEmbeddingConfig conf = DB_HANDLER.getEmbedding().fromQuery(config, procedureCallContext, vector, filter, limit, collection);
207+
VectorEmbeddingConfig conf = DB_HANDLER.getEmbedding().fromQuery(config, procedureCallContext, vector, filter, limit, index);
208208

209209
return getEmbeddingResultStream(conf, procedureCallContext, urlAccessChecker, tx,
210210
v -> {
@@ -215,7 +215,7 @@ private Stream<VectorDbUtil.EmbeddingResult> queryCommon(String hostOrKey, Strin
215215
}
216216

217217
private Map<String, Object> getVectorDbInfo(
218-
String hostOrKey, String collection, Map<String, Object> configuration, String templateUrl) {
219-
return getCommonVectorDbInfo(hostOrKey, collection, configuration, templateUrl, DB_HANDLER);
218+
String hostOrKey, String index, Map<String, Object> configuration, String templateUrl) {
219+
return getCommonVectorDbInfo(hostOrKey, index, configuration, templateUrl, DB_HANDLER);
220220
}
221221
}

extended/src/main/java/apoc/vectordb/PineconeHandler.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ static class PineconeEmbeddingHandler implements VectorEmbeddingHandler {
5353
* that makes the request to respond 200 OK, but returns an empty result
5454
*/
5555
@Override
56-
public <T> VectorEmbeddingConfig fromGet(Map<String, Object> config, ProcedureCallContext procedureCallContext, List<T> ids, String collection) {
56+
public <T> VectorEmbeddingConfig fromGet(Map<String, Object> config, ProcedureCallContext procedureCallContext, List<T> ids, String index) {
5757
List<String> fields = procedureCallContext.outputFields().toList();
5858

5959
config.put(BODY_KEY, null);
@@ -74,7 +74,7 @@ public <T> VectorEmbeddingConfig fromGet(Map<String, Object> config, ProcedureCa
7474
}
7575

7676
@Override
77-
public VectorEmbeddingConfig fromQuery(Map<String, Object> config, ProcedureCallContext procedureCallContext, List<Double> vector, Object filter, long limit, String collection) {
77+
public VectorEmbeddingConfig fromQuery(Map<String, Object> config, ProcedureCallContext procedureCallContext, List<Double> vector, Object filter, long limit, String index) {
7878
List<String> fields = procedureCallContext.outputFields().toList();
7979

8080
Map<String, Object> additionalBodies = map("vector", vector,

0 commit comments

Comments
 (0)