-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreindex-file.js
More file actions
108 lines (86 loc) · 2.98 KB
/
reindex-file.js
File metadata and controls
108 lines (86 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
const { PrismaClient } = require('@prisma/client')
const fetch = require('node-fetch')
const prisma = new PrismaClient()
const MEMMACHINE_URL = 'http://localhost:8081'
const ORG_ID = 'personal-assistant'
const PROJECT_ID = 'todd-assistant'
async function reindexFile(fileId) {
console.log(`\n🔄 Reindexing file: ${fileId}`)
// 1. 获取文件信息
const file = await prisma.file.findUnique({
where: { id: fileId }
})
if (!file) {
console.error('❌ File not found')
return
}
console.log(`📄 File: ${file.filename}`)
console.log(` Text length: ${file.extractedText?.length || 0}`)
// 2. 获取所有chunks
const chunks = await prisma.fileChunk.findMany({
where: { fileId: file.id },
orderBy: { chunkIndex: 'asc' }
})
console.log(`📦 Found ${chunks.length} chunks`)
if (chunks.length === 0) {
console.log('⚠️ No chunks to index')
return
}
// 3. 准备MemMachine请求
const messages = chunks.map((chunk, index) => ({
content: `[PDF文档: ${file.filename} - 第${index + 1}块]\n${chunk.content}`,
role: 'user',
producer: file.userId,
produced_for: 'assistant',
metadata: {
fileId: file.id,
chunkIndex: index.toString(),
type: 'pdf_chunk',
filename: file.filename,
}
}))
console.log(`\n📤 Sending ${messages.length} memories to MemMachine...`)
console.log(` URL: ${MEMMACHINE_URL}/api/v2/memories`)
console.log(` Org: ${ORG_ID}`)
console.log(` Project: ${PROJECT_ID}`)
try {
const response = await fetch(`${MEMMACHINE_URL}/api/v2/memories`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
org_id: ORG_ID,
project_id: PROJECT_ID,
messages
})
})
console.log(`\n📡 Response status: ${response.status}`)
if (!response.ok) {
const text = await response.text()
console.error('❌ MemMachine error response:')
console.error(text)
return
}
const result = await response.json()
console.log('✅ Successfully indexed to MemMachine!')
console.log(' Response:', JSON.stringify(result, null, 2))
} catch (error) {
console.error('❌ Error calling MemMachine:', error.message)
console.error(' Stack:', error.stack)
}
}
// 批量重新索引所有待处理的文件
async function reindexAllFiles() {
const fileIds = [
'dae294aa-e878-436f-b67b-c30032c84bc9', // 2021.9国庆节放假安全协议书.pdf
'2717d05d-353d-42e6-96a4-ee851b75b3ad', // 茶青卡和商品标.pdf
'df1717f9-7283-4288-8a36-1d93bb17a3b0', // 茶青卡和商品标.pdf (duplicate)
'd84497a7-ddf2-4b99-aedb-54776049f3ad' // 2021.9国庆节放假安全协议书.pdf (duplicate)
]
console.log(`📋 Starting batch reindex of ${fileIds.length} files...\n`)
for (const fileId of fileIds) {
await reindexFile(fileId)
}
console.log('\n✅ Batch reindex complete!')
await prisma.$disconnect()
}
reindexAllFiles().catch(console.error)