|
| 1 | +const { pipeline } = require('@xenova/transformers'); |
| 2 | + |
| 3 | +// Cache the model to avoid reloading |
| 4 | +let embeddingModel = null; |
| 5 | + |
| 6 | +/** |
| 7 | + * Initialize the embedding model |
| 8 | + * Uses a lightweight sentence transformer model optimized for search |
| 9 | + */ |
| 10 | +async function getEmbeddingModel() { |
| 11 | + if (!embeddingModel) { |
| 12 | + console.log('Loading embedding model...'); |
| 13 | + embeddingModel = await pipeline( |
| 14 | + 'feature-extraction', |
| 15 | + 'Xenova/all-MiniLM-L6-v2' // Lightweight model, ~80MB, good for semantic search |
| 16 | + ); |
| 17 | + console.log('Embedding model loaded'); |
| 18 | + } |
| 19 | + return embeddingModel; |
| 20 | +} |
| 21 | + |
| 22 | +/** |
| 23 | + * Generate an embedding vector for a given text |
| 24 | + * @param {string} text - The text to embed |
| 25 | + * @returns {Promise<number[]>} - The embedding vector |
| 26 | + */ |
| 27 | +async function generateEmbedding(text) { |
| 28 | + const model = await getEmbeddingModel(); |
| 29 | + const output = await model(text, { |
| 30 | + pooling: 'mean', |
| 31 | + normalize: true, |
| 32 | + }); |
| 33 | + return Array.from(output.data); |
| 34 | +} |
| 35 | + |
| 36 | +/** |
| 37 | + * Calculate cosine similarity between two vectors |
| 38 | + * @param {number[]} vecA - First vector |
| 39 | + * @param {number[]} vecB - Second vector |
| 40 | + * @returns {number} - Cosine similarity score (0-1) |
| 41 | + */ |
| 42 | +function cosineSimilarity(vecA, vecB) { |
| 43 | + if (vecA.length !== vecB.length) { |
| 44 | + throw new Error('Vectors must have the same length'); |
| 45 | + } |
| 46 | + |
| 47 | + let dotProduct = 0; |
| 48 | + let normA = 0; |
| 49 | + let normB = 0; |
| 50 | + |
| 51 | + for (let i = 0; i < vecA.length; i++) { |
| 52 | + dotProduct += vecA[i] * vecB[i]; |
| 53 | + normA += vecA[i] * vecA[i]; |
| 54 | + normB += vecB[i] * vecB[i]; |
| 55 | + } |
| 56 | + |
| 57 | + return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB)); |
| 58 | +} |
| 59 | + |
| 60 | +/** |
| 61 | + * Create a searchable text from plant data |
| 62 | + * @param {Object} plant - Plant document |
| 63 | + * @returns {string} - Combined searchable text |
| 64 | + */ |
| 65 | +function createSearchableText(plant) { |
| 66 | + const parts = [ |
| 67 | + plant['Common Name'] || '', |
| 68 | + plant['Scientific Name'] || '', |
| 69 | + ].filter(Boolean); |
| 70 | + |
| 71 | + return parts.join(' '); |
| 72 | +} |
| 73 | + |
| 74 | +module.exports = { |
| 75 | + generateEmbedding, |
| 76 | + cosineSimilarity, |
| 77 | + createSearchableText, |
| 78 | + getEmbeddingModel, |
| 79 | +}; |
| 80 | + |
| 81 | + |
| 82 | + |
0 commit comments