-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate-embeddings.js
More file actions
67 lines (51 loc) · 2.19 KB
/
generate-embeddings.js
File metadata and controls
67 lines (51 loc) · 2.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
require('dotenv').config();
const fs = require('fs').promises;
const OpenAI = require('openai');
const { v4: uuidv4 } = require('uuid');
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY
});
async function generateEmbeddings() {
const inputFile = 'extracted_questions.json';
const outputFile = 'questions-with-embeddings.json';
try {
const fileContent = await fs.readFile(inputFile, 'utf8');
const questions = JSON.parse(fileContent);
console.log(`Total questions to process: ${questions.length}`);
const questionsWithEmbeddings = [];
const batchSize = 100;
for (let i = 0; i < questions.length; i += batchSize) {
const batch = questions.slice(i, Math.min(i + batchSize, questions.length));
console.log(`Processing batch ${Math.floor(i / batchSize) + 1} (questions ${i + 1}-${Math.min(i + batchSize, questions.length)})...`);
const texts = batch.map(q => q.question);
try {
const response = await openai.embeddings.create({
model: 'text-embedding-3-small',
input: texts
});
for (let j = 0; j < batch.length; j++) {
questionsWithEmbeddings.push({
question_id: uuidv4(),
question: batch[j].question,
message_id: batch[j].message_id,
embedding: response.data[j].embedding
});
}
console.log(` Generated embeddings for ${batch.length} questions`);
// Save progress after each batch
// await fs.writeFile(outputFile, JSON.stringify(questionsWithEmbeddings, null, 2));
console.log(` Saved ${questionsWithEmbeddings.length} questions with embeddings so far`);
} catch (error) {
console.error(` Error processing batch: ${error.message}`);
}
await new Promise(resolve => setTimeout(resolve, 500));
}
console.log(`\n✓ Embedding generation complete!`);
console.log(`✓ Total questions with embeddings: ${questionsWithEmbeddings.length}`);
console.log(`✓ Results saved to: ${outputFile}`);
} catch (error) {
console.error('Error:', error);
process.exit(1);
}
}
generateEmbeddings();