Skip to content

Commit a786414

Browse files
Merge pull request #161 from CodeForPhilly/feat/zbl-enahancedsearch
Feat/zbl enahancedsearch
2 parents 5a35719 + 3ba1142 commit a786414

File tree

12 files changed

+13844
-6192
lines changed

12 files changed

+13844
-6192
lines changed

.gitignore

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,13 @@ node_modules
2020
#You need to get the db from the the linode store via the sync down script not from git.
2121
db_backups/
2222

23-
23+
REF REPO/
2424

2525
.cursorignore
2626

27+
SEMANTIC_SEARCH.md
28+
TODO.MD
29+
30+
settings.local.json
2731

28-
TODO.MD
32+

lib/embeddings.js

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
const { pipeline } = require('@xenova/transformers');
2+
3+
// Cache the model to avoid reloading
4+
let embeddingModel = null;
5+
6+
/**
7+
* Initialize the embedding model
8+
* Uses a lightweight sentence transformer model optimized for search
9+
*/
10+
async function getEmbeddingModel() {
11+
if (!embeddingModel) {
12+
console.log('Loading embedding model...');
13+
embeddingModel = await pipeline(
14+
'feature-extraction',
15+
'Xenova/all-MiniLM-L6-v2' // Lightweight model, ~80MB, good for semantic search
16+
);
17+
console.log('Embedding model loaded');
18+
}
19+
return embeddingModel;
20+
}
21+
22+
/**
23+
* Generate an embedding vector for a given text
24+
* @param {string} text - The text to embed
25+
* @returns {Promise<number[]>} - The embedding vector
26+
*/
27+
async function generateEmbedding(text) {
28+
const model = await getEmbeddingModel();
29+
const output = await model(text, {
30+
pooling: 'mean',
31+
normalize: true,
32+
});
33+
return Array.from(output.data);
34+
}
35+
36+
/**
37+
* Calculate cosine similarity between two vectors
38+
* @param {number[]} vecA - First vector
39+
* @param {number[]} vecB - Second vector
40+
* @returns {number} - Cosine similarity score (0-1)
41+
*/
42+
function cosineSimilarity(vecA, vecB) {
43+
if (vecA.length !== vecB.length) {
44+
throw new Error('Vectors must have the same length');
45+
}
46+
47+
let dotProduct = 0;
48+
let normA = 0;
49+
let normB = 0;
50+
51+
for (let i = 0; i < vecA.length; i++) {
52+
dotProduct += vecA[i] * vecB[i];
53+
normA += vecA[i] * vecA[i];
54+
normB += vecB[i] * vecB[i];
55+
}
56+
57+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
58+
}
59+
60+
/**
61+
* Create a searchable text from plant data
62+
* @param {Object} plant - Plant document
63+
* @returns {string} - Combined searchable text
64+
*/
65+
function createSearchableText(plant) {
66+
const parts = [
67+
plant['Common Name'] || '',
68+
plant['Scientific Name'] || '',
69+
].filter(Boolean);
70+
71+
return parts.join(' ');
72+
}
73+
74+
module.exports = {
75+
generateEmbedding,
76+
cosineSimilarity,
77+
createSearchableText,
78+
getEmbeddingModel,
79+
};
80+
81+
82+

0 commit comments

Comments
 (0)