Improved examples [skip ci]

ankane · ankane · commit 0fbe9197634b · 2025-02-16T18:02:17.000-08:00
diff --git a/examples/cohere/example.js b/examples/cohere/example.js
@@ -11,7 +11,7 @@ await pgvector.registerTypes(client);
 await client.query('DROP TABLE IF EXISTS documents');
 await client.query('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding bit(1024))');
 
-async function fetchEmbeddings(texts, inputType) {
+async function embed(texts, inputType) {
   const cohere = new CohereClient();
   const response = await cohere.embed({
     texts: texts,
@@ -29,14 +29,14 @@ const input = [
   'The cat is purring',
   'The bear is growling'
 ];
-const embeddings = await fetchEmbeddings(input, 'search_document');
+const embeddings = await embed(input, 'search_document');
 for (let [i, content] of input.entries()) {
   await client.query('INSERT INTO documents (content, embedding) VALUES ($1, $2)', [content, embeddings[i]]);
 }
 
 const query = 'forest';
-const queryEmbedding = (await fetchEmbeddings([query], 'search_query'))[0];
-const { rows } = await client.query('SELECT * FROM documents ORDER BY embedding <~> $1 LIMIT 5', [queryEmbedding]);
+const queryEmbedding = (await embed([query], 'search_query'))[0];
+const { rows } = await client.query('SELECT content FROM documents ORDER BY embedding <~> $1 LIMIT 5', [queryEmbedding]);
 for (let row of rows) {
   console.log(row.content);
 }
diff --git a/examples/hybrid-search/example.js b/examples/hybrid-search/example.js
@@ -1,4 +1,4 @@
-import { pipeline } from '@xenova/transformers';
+import { pipeline } from '@huggingface/transformers';
 import pg from 'pg';
 import pgvector from 'pgvector/pg';
 
@@ -18,15 +18,15 @@ const input = [
   'The bear is growling'
 ];
 
-const extractor = await pipeline('feature-extraction', 'Xenova/multi-qa-MiniLM-L6-cos-v1');
+const extractor = await pipeline('feature-extraction', 'Xenova/multi-qa-MiniLM-L6-cos-v1', {dtype: 'fp32'});
 
-async function generateEmbedding(content) {
+async function embed(content) {
   const output = await extractor(content, {pooling: 'mean', normalize: true});
   return Array.from(output.data);
 }
 
 for (let content of input) {
-  const embedding = await generateEmbedding(content);
+  const embedding = await embed(content);
   await client.query('INSERT INTO documents (content, embedding) VALUES ($1, $2)', [content, pgvector.toSql(embedding)]);
 }
 
@@ -54,7 +54,7 @@ ORDER BY score DESC
 LIMIT 5
 `;
 const query = 'growling bear'
-const embedding = await generateEmbedding(query);
+const embedding = await embed(query);
 const k = 60
 const { rows } = await client.query(sql, [query, pgvector.toSql(embedding), k]);
 for (let row of rows) {
diff --git a/examples/hybrid-search/package.json b/examples/hybrid-search/package.json
@@ -2,7 +2,7 @@
     "private": true,
     "type": "module",
     "dependencies": {
-        "@xenova/transformers": "^2.6.0",
+        "@huggingface/transformers": "^3.3.3",
         "pg": "^8.11.3",
         "pgvector": "file:../.."
     }
diff --git a/examples/openai/example.js b/examples/openai/example.js
@@ -11,22 +11,25 @@ await pgvector.registerTypes(client);
 await client.query('DROP TABLE IF EXISTS documents');
 await client.query('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(1536))');
 
+async function embed(input) {
+  const openai = new OpenAI();
+  const response = await openai.embeddings.create({input: input, model: 'text-embedding-3-small'});
+  return response.data.map((v) => v.embedding);
+}
+
 const input = [
   'The dog is barking',
   'The cat is purring',
   'The bear is growling'
 ];
-
-const openai = new OpenAI();
-const response = await openai.embeddings.create({input: input, model: 'text-embedding-3-small'});
-const embeddings = response.data.map((v) => v.embedding);
-
+const embeddings = await embed(input);
 for (let [i, content] of input.entries()) {
   await client.query('INSERT INTO documents (content, embedding) VALUES ($1, $2)', [content, pgvector.toSql(embeddings[i])]);
 }
 
-const documentId = 2;
-const { rows } = await client.query('SELECT * FROM documents WHERE id != $1 ORDER BY embedding <=> (SELECT embedding FROM documents WHERE id = $1) LIMIT 5', [documentId]);
+const query = 'forest';
+const queryEmbedding = (await embed([query]))[0];
+const { rows } = await client.query('SELECT content FROM documents ORDER BY embedding <=> $1 LIMIT 5', [pgvector.toSql(queryEmbedding)]);
 for (let row of rows) {
   console.log(row.content);
 }
diff --git a/examples/sparse-search/example.js b/examples/sparse-search/example.js
@@ -18,7 +18,7 @@ await pgvector.registerTypes(client);
 await client.query('DROP TABLE IF EXISTS documents');
 await client.query('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding sparsevec(30522))');
 
-async function fetchEmbeddings(inputs) {
+async function embed(inputs) {
   const url = 'http://localhost:3000/embed_sparse';
   const data = {inputs: inputs};
   const options = {
@@ -48,14 +48,14 @@ const input = [
   'The bear is growling'
 ];
 
-const embeddings = await fetchEmbeddings(input);
+const embeddings = await embed(input);
 for (let [i, content] of input.entries()) {
   await client.query('INSERT INTO documents (content, embedding) VALUES ($1, $2)', [content, new SparseVector(embeddings[i], 30522)]);
 }
 
 const query = 'forest';
-const queryEmbeddings = await fetchEmbeddings([query]);
-const { rows } = await client.query('SELECT content FROM documents ORDER BY embedding <#> $1 LIMIT 5', [new SparseVector(queryEmbeddings[0], 30522)]);
+const queryEmbedding = (await embed([query]))[0];
+const { rows } = await client.query('SELECT content FROM documents ORDER BY embedding <#> $1 LIMIT 5', [new SparseVector(queryEmbedding, 30522)]);
 for (let row of rows) {
   console.log(row.content);
 }

Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`"private": true,`
`3`	`3`	`"type": "module",`
`4`	`4`	`"dependencies": {`
`5`		`- "@xenova/transformers": "^2.6.0",`
	`5`	`+ "@huggingface/transformers": "^3.3.3",`
`6`	`6`	`"pg": "^8.11.3",`
`7`	`7`	`"pgvector": "file:../.."`
`8`	`8`	`}`