Skip to content

Commit 923381e

Browse files
committed
Added sparse search example [skip ci]
1 parent 34513e0 commit 923381e

File tree

3 files changed

+72
-0
lines changed

3 files changed

+72
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Or check out some examples:
3535
- [Binary embeddings](examples/cohere/example.js) with Cohere
3636
- [Sentence embeddings](examples/transformers/example.js) with Transformers.js
3737
- [Hybrid search](examples/hybrid-search/example.js) with Transformers.js
38+
- [Sparse search](examples/sparse-search/example.js) with Text Embeddings Inference
3839
- [Morgan fingerprints](examples/rdkit/example.js) with RDKit.js
3940
- [Recommendations](examples/disco/example.js) with Disco
4041
- [Horizontal scaling](examples/citus/example.js) with Citus

examples/sparse-search/example.js

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// good resources
2+
// https://opensearch.org/blog/improving-document-retrieval-with-sparse-semantic-encoders/
3+
// https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-v1
4+
//
5+
// run with
6+
// text-embeddings-router --model-id opensearch-project/opensearch-neural-sparse-encoding-v1 --pooling splade
7+
8+
import pg from 'pg';
9+
import { SparseVector } from 'pgvector';
10+
import pgvector from 'pgvector/pg';
11+
12+
const client = new pg.Client({database: 'pgvector_example'});
13+
await client.connect();
14+
15+
await client.query('CREATE EXTENSION IF NOT EXISTS vector');
16+
await pgvector.registerTypes(client);
17+
18+
await client.query('DROP TABLE IF EXISTS documents');
19+
await client.query('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding sparsevec(30522))');
20+
21+
async function fetchEmbeddings(inputs) {
22+
const url = 'http://localhost:3000/embed_sparse';
23+
const data = {inputs: inputs};
24+
const options = {
25+
method: 'POST',
26+
headers: {'Content-Type': 'application/json'},
27+
body: JSON.stringify(data)
28+
};
29+
const response = await fetch(url, options);
30+
if (!response.ok) {
31+
throw new Error(`Bad status: ${response.status}`);
32+
}
33+
const json = await response.json();
34+
const embeddings = [];
35+
for (let item of json) {
36+
const embedding = {};
37+
for (let e of item) {
38+
embedding[e['index']] = e['value'];
39+
}
40+
embeddings.push(embedding);
41+
}
42+
return embeddings;
43+
}
44+
45+
const input = [
46+
'The dog is barking',
47+
'The cat is purring',
48+
'The bear is growling'
49+
];
50+
51+
const embeddings = await fetchEmbeddings(input);
52+
for (let [i, content] of input.entries()) {
53+
await client.query('INSERT INTO documents (content, embedding) VALUES ($1, $2)', [content, new SparseVector(embeddings[i], 30522)]);
54+
}
55+
56+
const query = 'forest';
57+
const queryEmbeddings = await fetchEmbeddings([query]);
58+
const { rows } = await client.query('SELECT content FROM documents ORDER BY embedding <#> $1 LIMIT 5', [new SparseVector(queryEmbeddings[0], 30522)]);
59+
for (let row of rows) {
60+
console.log(row.content);
61+
}
62+
63+
await client.end();
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"private": true,
3+
"type": "module",
4+
"dependencies": {
5+
"pg": "^8.11.3",
6+
"pgvector": "file:../.."
7+
}
8+
}

0 commit comments

Comments
 (0)