@@ -56,7 +56,9 @@ export async function processDocument(
5656 mimeType : string ,
5757 chunkSize = 1000 ,
5858 chunkOverlap = 200 ,
59- minChunkSize = 1
59+ minChunkSize = 1 ,
60+ userId ?: string ,
61+ workspaceId ?: string | null
6062) : Promise < {
6163 chunks : Chunk [ ]
6264 metadata : {
@@ -73,7 +75,7 @@ export async function processDocument(
7375 logger . info ( `Processing document: ${ filename } ` )
7476
7577 try {
76- const parseResult = await parseDocument ( fileUrl , filename , mimeType )
78+ const parseResult = await parseDocument ( fileUrl , filename , mimeType , userId , workspaceId )
7779 const { content, processingMethod } = parseResult
7880 const cloudUrl = 'cloudUrl' in parseResult ? parseResult . cloudUrl : undefined
7981
@@ -131,7 +133,9 @@ export async function processDocument(
131133async function parseDocument (
132134 fileUrl : string ,
133135 filename : string ,
134- mimeType : string
136+ mimeType : string ,
137+ userId ?: string ,
138+ workspaceId ?: string | null
135139) : Promise < {
136140 content : string
137141 processingMethod : 'file-parser' | 'mistral-ocr'
@@ -146,20 +150,26 @@ async function parseDocument(
146150 if ( isPDF && ( hasAzureMistralOCR || hasMistralOCR ) ) {
147151 if ( hasAzureMistralOCR ) {
148152 logger . info ( `Using Azure Mistral OCR: ${ filename } ` )
149- return parseWithAzureMistralOCR ( fileUrl , filename , mimeType )
153+ return parseWithAzureMistralOCR ( fileUrl , filename , mimeType , userId , workspaceId )
150154 }
151155
152156 if ( hasMistralOCR ) {
153157 logger . info ( `Using Mistral OCR: ${ filename } ` )
154- return parseWithMistralOCR ( fileUrl , filename , mimeType )
158+ return parseWithMistralOCR ( fileUrl , filename , mimeType , userId , workspaceId )
155159 }
156160 }
157161
158162 logger . info ( `Using file parser: ${ filename } ` )
159163 return parseWithFileParser ( fileUrl , filename , mimeType )
160164}
161165
162- async function handleFileForOCR ( fileUrl : string , filename : string , mimeType : string ) {
166+ async function handleFileForOCR (
167+ fileUrl : string ,
168+ filename : string ,
169+ mimeType : string ,
170+ userId ?: string ,
171+ workspaceId ?: string | null
172+ ) {
163173 const isExternalHttps = fileUrl . startsWith ( 'https://' ) && ! fileUrl . includes ( '/api/files/serve/' )
164174
165175 if ( isExternalHttps ) {
@@ -175,6 +185,8 @@ async function handleFileForOCR(fileUrl: string, filename: string, mimeType: str
175185 originalName : filename ,
176186 uploadedAt : new Date ( ) . toISOString ( ) ,
177187 purpose : 'knowledge-base' ,
188+ ...( userId && { userId } ) ,
189+ ...( workspaceId && { workspaceId } ) ,
178190 }
179191
180192 const cloudResult = await StorageService . uploadFile ( {
@@ -288,7 +300,13 @@ async function makeOCRRequest(
288300 }
289301}
290302
291- async function parseWithAzureMistralOCR ( fileUrl : string , filename : string , mimeType : string ) {
303+ async function parseWithAzureMistralOCR (
304+ fileUrl : string ,
305+ filename : string ,
306+ mimeType : string ,
307+ userId ?: string ,
308+ workspaceId ?: string | null
309+ ) {
292310 validateOCRConfig (
293311 env . OCR_AZURE_API_KEY ,
294312 env . OCR_AZURE_ENDPOINT ,
@@ -336,12 +354,18 @@ async function parseWithAzureMistralOCR(fileUrl: string, filename: string, mimeT
336354 } )
337355
338356 return env . MISTRAL_API_KEY
339- ? parseWithMistralOCR ( fileUrl , filename , mimeType )
357+ ? parseWithMistralOCR ( fileUrl , filename , mimeType , userId , workspaceId )
340358 : parseWithFileParser ( fileUrl , filename , mimeType )
341359 }
342360}
343361
344- async function parseWithMistralOCR ( fileUrl : string , filename : string , mimeType : string ) {
362+ async function parseWithMistralOCR (
363+ fileUrl : string ,
364+ filename : string ,
365+ mimeType : string ,
366+ userId ?: string ,
367+ workspaceId ?: string | null
368+ ) {
345369 if ( ! env . MISTRAL_API_KEY ) {
346370 throw new Error ( 'Mistral API key required' )
347371 }
@@ -350,7 +374,13 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
350374 throw new Error ( 'Mistral parser tool not configured' )
351375 }
352376
353- const { httpsUrl, cloudUrl } = await handleFileForOCR ( fileUrl , filename , mimeType )
377+ const { httpsUrl, cloudUrl } = await handleFileForOCR (
378+ fileUrl ,
379+ filename ,
380+ mimeType ,
381+ userId ,
382+ workspaceId
383+ )
354384 const params = { filePath : httpsUrl , apiKey : env . MISTRAL_API_KEY , resultType : 'text' as const }
355385
356386 try {
@@ -361,7 +391,9 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
361391 ? mistralParserTool . request ! . url ( params )
362392 : mistralParserTool . request ! . url
363393
364- if ( url . startsWith ( '/' ) ) {
394+ const isInternalRoute = url . startsWith ( '/' )
395+
396+ if ( isInternalRoute ) {
365397 const { getBaseUrl } = await import ( '@/lib/urls/utils' )
366398 url = `${ getBaseUrl ( ) } ${ url } `
367399 }
@@ -371,9 +403,9 @@ async function parseWithMistralOCR(fileUrl: string, filename: string, mimeType:
371403 ? mistralParserTool . request ! . headers ( params )
372404 : mistralParserTool . request ! . headers
373405
374- if ( url . includes ( '/api/tools/mistral/parse' ) ) {
406+ if ( isInternalRoute ) {
375407 const { generateInternalToken } = await import ( '@/lib/auth/internal' )
376- const internalToken = await generateInternalToken ( )
408+ const internalToken = await generateInternalToken ( userId )
377409 headers = {
378410 ...headers ,
379411 Authorization : `Bearer ${ internalToken } ` ,
0 commit comments