1
+ import fs from 'fs'
1
2
import { NextFunction , Request , Response , Router } from 'express'
2
3
import { EMBED_DIM } from '../../config'
3
- import { createChunkIndex , deleteChunkIndex , getRagFileChunks } from '../services/rag/chunkDb'
4
4
import { RagFile , RagIndex } from '../db/models'
5
5
import { RequestWithUser } from '../types'
6
6
import z from 'zod'
7
7
import { queryRagIndex } from '../services/rag/query'
8
- import { ingestionPipeline } from '../services/rag/ingestion/pipeline'
9
8
import multer from 'multer'
10
9
import { mkdir , rm , stat } from 'fs/promises'
11
- import { getOllamaOpenAIClient } from '../util/ollama '
10
+ import { getAzureOpenAIClient } from '../util/azure/client '
12
11
13
12
const router = Router ( )
14
13
@@ -23,16 +22,20 @@ router.post('/indices', async (req, res) => {
23
22
const { user } = req as RequestWithUser
24
23
const { name, dim } = IndexCreationSchema . parse ( req . body )
25
24
25
+ const client = getAzureOpenAIClient ( 'curredev4omini' )
26
+ const vectorStore = await client . vectorStores . create ( {
27
+ name,
28
+ } )
29
+
26
30
const ragIndex = await RagIndex . create ( {
27
31
userId : user . id ,
28
32
metadata : {
29
33
name,
30
34
dim,
35
+ azureVectorStoreId : vectorStore . id ,
31
36
} ,
32
37
} )
33
38
34
- await createChunkIndex ( ragIndex )
35
-
36
39
// Create upload directory for this index
37
40
38
41
res . json ( ragIndex )
@@ -51,7 +54,14 @@ router.delete('/indices/:id', async (req, res) => {
51
54
return
52
55
}
53
56
54
- await deleteChunkIndex ( ragIndex )
57
+ const client = getAzureOpenAIClient ( 'curredev4omini' )
58
+ try {
59
+ await client . vectorStores . del ( ragIndex . metadata . azureVectorStoreId )
60
+ } catch ( error ) {
61
+ console . error ( `Failed to delete Azure vector store ${ ragIndex . metadata . azureVectorStoreId } :` , error )
62
+ res . status ( 500 ) . json ( { error : 'Failed to delete Azure vector store' } )
63
+ return
64
+ }
55
65
56
66
const uploadPath = `${ UPLOAD_DIR } /${ id } `
57
67
try {
@@ -77,11 +87,14 @@ router.get('/indices', async (_req, res) => {
77
87
] ,
78
88
} )
79
89
90
+ const client = getAzureOpenAIClient ( 'curredev4omini' )
91
+
80
92
// Add ragFileCount to each index
81
93
const indicesWithCount = await Promise . all (
82
94
indices . map ( async ( index : any ) => {
95
+ const vectorStore = await client . vectorStores . retrieve ( index . metadata . azureVectorStoreId )
83
96
const count = await RagFile . count ( { where : { ragIndexId : index . id } } )
84
- return { ...index . toJSON ( ) , ragFileCount : count }
97
+ return { ...index . toJSON ( ) , ragFileCount : count , vectorStore }
85
98
} ) ,
86
99
)
87
100
@@ -107,7 +120,13 @@ router.get('/indices/:id', async (req, res) => {
107
120
return
108
121
}
109
122
110
- res . json ( ragIndex )
123
+ const client = getAzureOpenAIClient ( 'curredev4omini' )
124
+ const vectorStore = await client . vectorStores . retrieve ( ragIndex . metadata . azureVectorStoreId )
125
+
126
+ res . json ( {
127
+ ...ragIndex . toJSON ( ) ,
128
+ vectorStore,
129
+ } )
111
130
} )
112
131
113
132
router . get ( '/indices/:id/files/:fileId' , async ( req , res ) => {
@@ -129,11 +148,20 @@ router.get('/indices/:id/files/:fileId', async (req, res) => {
129
148
return
130
149
}
131
150
132
- const chunks = await getRagFileChunks ( ragFile . ragIndex , ragFile )
151
+ // Read file content
152
+ const filePath = `${ UPLOAD_DIR } /${ indexId } /${ ragFile . filename } `
153
+ let fileContent : string
154
+ try {
155
+ fileContent = await fs . promises . readFile ( filePath , 'utf-8' )
156
+ } catch ( error ) {
157
+ console . error ( `Failed to read file ${ filePath } :` , error )
158
+ res . status ( 500 ) . json ( { error : 'Failed to read file content' } )
159
+ return
160
+ }
133
161
134
162
res . json ( {
135
163
...ragFile . toJSON ( ) ,
136
- chunks ,
164
+ fileContent ,
137
165
} )
138
166
} )
139
167
@@ -160,9 +188,7 @@ const indexUploadDirMiddleware = async (req: Request, _res: Response, next: Next
160
188
const uploadPath = `${ UPLOAD_DIR } /${ id } `
161
189
try {
162
190
await stat ( uploadPath )
163
- await rm ( uploadPath , { recursive : true , force : true } )
164
- console . log ( `Upload directory ${ uploadPath } deleted` )
165
- await mkdir ( uploadPath , { recursive : true } )
191
+ console . log ( `RAG upload dir exists: ${ uploadPath } ` )
166
192
} catch ( error ) {
167
193
console . warn ( `RAG upload dir not found, creating ${ uploadPath } --- ${ error } ` )
168
194
await mkdir ( uploadPath , { recursive : true } )
@@ -187,23 +213,44 @@ router.post('/indices/:id/upload', [indexUploadDirMiddleware, uploadMiddleware],
187
213
return
188
214
}
189
215
190
- await RagFile . bulkCreate (
191
- req . files . map ( ( file ) => ( {
192
- filename : file . originalname ,
193
- fileType : 'pending' ,
194
- fileSize : 0 ,
195
- numChunks : 0 ,
196
- userId : user . id ,
197
- ragIndexId : ragIndex . id ,
198
- pipelineStage : 'pending' ,
199
- } ) ) ,
216
+ const ragFiles : RagFile [ ] = await Promise . all (
217
+ req . files . map ( ( file : Express . Multer . File ) =>
218
+ RagFile . create ( {
219
+ userId : user . id ,
220
+ ragIndexId : ragIndex . id ,
221
+ pipelineStage : 'upload' ,
222
+ filename : file . originalname ,
223
+ fileType : file . mimetype ,
224
+ fileSize : file . size ,
225
+ metadata : { } ,
226
+ } ) ,
227
+ ) ,
200
228
)
201
229
202
- const openAiClient = getOllamaOpenAIClient ( ) // getAzureOpenAIClient(EMBED_MODEL)
230
+ const client = getAzureOpenAIClient ( 'curredev4omini' )
231
+
232
+ const uploadDirPath = `${ UPLOAD_DIR } /${ id } `
203
233
204
- ingestionPipeline ( openAiClient , `uploads/rag/${ id } ` , ragIndex , user )
234
+ const fileStreams = req . files . map ( ( file : Express . Multer . File ) => {
235
+ const filePath = `${ uploadDirPath } /${ file . originalname } `
236
+ return fs . createReadStream ( filePath )
237
+ } )
238
+
239
+ const fileBatchRes = await client . vectorStores . fileBatches . uploadAndPoll ( ragIndex . metadata . azureVectorStoreId , {
240
+ files : fileStreams ,
241
+ } )
242
+
243
+ console . log ( 'File batch upload response:' , fileBatchRes )
244
+
245
+ await Promise . all (
246
+ ragFiles . map ( async ( ragFile ) =>
247
+ ragFile . update ( {
248
+ pipelineStage : 'completed' ,
249
+ } ) ,
250
+ ) ,
251
+ )
205
252
206
- res . json ( { message : 'Files uploaded successfully, starting ingestion ' } )
253
+ res . json ( { message : 'Files uploaded successfully' } )
207
254
} )
208
255
209
256
const RagIndexQuerySchema = z . object ( {
0 commit comments