Added sparse search example [skip ci]

ankane · ankane · commit 923381edc211 · 2025-01-11T20:45:08.000-08:00
diff --git a/README.md b/README.md
@@ -35,6 +35,7 @@ Or check out some examples:
 - [Binary embeddings](examples/cohere/example.js) with Cohere
 - [Sentence embeddings](examples/transformers/example.js) with Transformers.js
 - [Hybrid search](examples/hybrid-search/example.js) with Transformers.js
+- [Sparse search](examples/sparse-search/example.js) with Text Embeddings Inference
 - [Morgan fingerprints](examples/rdkit/example.js) with RDKit.js
 - [Recommendations](examples/disco/example.js) with Disco
 - [Horizontal scaling](examples/citus/example.js) with Citus
diff --git a/examples/sparse-search/example.js b/examples/sparse-search/example.js
@@ -0,0 +1,63 @@
+// good resources
+// https://opensearch.org/blog/improving-document-retrieval-with-sparse-semantic-encoders/
+// https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-v1
+//
+// run with
+// text-embeddings-router --model-id opensearch-project/opensearch-neural-sparse-encoding-v1 --pooling splade
+
+import pg from 'pg';
+import { SparseVector } from 'pgvector';
+import pgvector from 'pgvector/pg';
+
+const client = new pg.Client({database: 'pgvector_example'});
+await client.connect();
+
+await client.query('CREATE EXTENSION IF NOT EXISTS vector');
+await pgvector.registerTypes(client);
+
+await client.query('DROP TABLE IF EXISTS documents');
+await client.query('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding sparsevec(30522))');
+
+async function fetchEmbeddings(inputs) {
+  const url = 'http://localhost:3000/embed_sparse';
+  const data = {inputs: inputs};
+  const options = {
+    method: 'POST',
+    headers: {'Content-Type': 'application/json'},
+    body: JSON.stringify(data)
+  };
+  const response = await fetch(url, options);
+  if (!response.ok) {
+    throw new Error(`Bad status: ${response.status}`);
+  }
+  const json = await response.json();
+  const embeddings = [];
+  for (let item of json) {
+    const embedding = {};
+    for (let e of item) {
+      embedding[e['index']] = e['value'];
+    }
+    embeddings.push(embedding);
+  }
+  return embeddings;
+}
+
+const input = [
+  'The dog is barking',
+  'The cat is purring',
+  'The bear is growling'
+];
+
+const embeddings = await fetchEmbeddings(input);
+for (let [i, content] of input.entries()) {
+  await client.query('INSERT INTO documents (content, embedding) VALUES ($1, $2)', [content, new SparseVector(embeddings[i], 30522)]);
+}
+
+const query = 'forest';
+const queryEmbeddings = await fetchEmbeddings([query]);
+const { rows } = await client.query('SELECT content FROM documents ORDER BY embedding <#> $1 LIMIT 5', [new SparseVector(queryEmbeddings[0], 30522)]);
+for (let row of rows) {
+  console.log(row.content);
+}
+
+await client.end();
diff --git a/examples/sparse-search/package.json b/examples/sparse-search/package.json
@@ -0,0 +1,8 @@
+{
+    "private": true,
+    "type": "module",
+    "dependencies": {
+        "pg": "^8.11.3",
+        "pgvector": "file:../.."
+    }
+}