@@ -258,7 +258,11 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
258258 throw new Error ( 'Invalid response format from Mistral OCR API' )
259259 }
260260
261- // Set default values and extract from params if available
261+ const mistralData =
262+ ocrResult . output && typeof ocrResult . output === 'object' && ! ocrResult . pages
263+ ? ocrResult . output
264+ : ocrResult
265+
262266 let resultType : 'markdown' | 'text' | 'json' = 'markdown'
263267 let sourceUrl = ''
264268 let isFileUpload = false
@@ -268,50 +272,44 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
268272 sourceUrl = params . filePath . trim ( )
269273 }
270274
271- // Check if this was a file upload
272275 isFileUpload = ! ! params . fileUpload
273276
274277 if ( params . resultType && [ 'markdown' , 'text' , 'json' ] . includes ( params . resultType ) ) {
275278 resultType = params . resultType as 'markdown' | 'text' | 'json'
276279 }
277280 } else if (
278- ocrResult . document &&
279- typeof ocrResult . document === 'object' &&
280- ocrResult . document . document_url &&
281- typeof ocrResult . document . document_url === 'string'
281+ mistralData . document &&
282+ typeof mistralData . document === 'object' &&
283+ mistralData . document . document_url &&
284+ typeof mistralData . document . document_url === 'string'
282285 ) {
283- sourceUrl = ocrResult . document . document_url
286+ sourceUrl = mistralData . document . document_url
284287 }
285288
286- // Process content from pages
287289 let content = ''
288290 const pageCount =
289- ocrResult . pages && Array . isArray ( ocrResult . pages ) ? ocrResult . pages . length : 0
291+ mistralData . pages && Array . isArray ( mistralData . pages ) ? mistralData . pages . length : 0
290292
291293 if ( pageCount > 0 ) {
292- content = ocrResult . pages
294+ content = mistralData . pages
293295 . map ( ( page : any ) => ( page && typeof page . markdown === 'string' ? page . markdown : '' ) )
294296 . filter ( Boolean )
295297 . join ( '\n\n' )
296298 } else {
297299 logger . warn ( 'No pages found in OCR result, returning raw response' )
298- content = JSON . stringify ( ocrResult , null , 2 )
300+ content = JSON . stringify ( mistralData , null , 2 )
299301 }
300302
301- // Process based on requested result type
302303 if ( resultType === 'text' ) {
303- // Strip markdown formatting
304304 content = content
305305 . replace ( / # # * \s / g, '' ) // Remove markdown headers
306306 . replace ( / \* \* / g, '' ) // Remove bold markers
307307 . replace ( / \* / g, '' ) // Remove italic markers
308308 . replace ( / \n { 3 , } / g, '\n\n' ) // Normalize newlines
309309 } else if ( resultType === 'json' ) {
310- // Return the structured data as JSON string
311- content = JSON . stringify ( ocrResult , null , 2 )
310+ content = JSON . stringify ( mistralData , null , 2 )
312311 }
313312
314- // Extract file information with proper validation
315313 let fileName = 'document.pdf'
316314 let fileType = 'pdf'
317315
@@ -333,40 +331,36 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
333331 }
334332 }
335333
336- // Generate a tracking ID with timestamp and random component for uniqueness
337334 const timestamp = Date . now ( )
338335 const randomId = Math . random ( ) . toString ( 36 ) . substring ( 2 , 10 )
339336 const jobId = `mistral-ocr-${ timestamp } -${ randomId } `
340337
341- // Map API response fields to our schema with proper type checking
342338 const usageInfo =
343- ocrResult . usage_info && typeof ocrResult . usage_info === 'object'
339+ mistralData . usage_info && typeof mistralData . usage_info === 'object'
344340 ? {
345341 pagesProcessed :
346- typeof ocrResult . usage_info . pages_processed === 'number'
347- ? ocrResult . usage_info . pages_processed
348- : Number ( ocrResult . usage_info . pages_processed ) ,
342+ typeof mistralData . usage_info . pages_processed === 'number'
343+ ? mistralData . usage_info . pages_processed
344+ : Number ( mistralData . usage_info . pages_processed ) ,
349345 docSizeBytes :
350- typeof ocrResult . usage_info . doc_size_bytes === 'number'
351- ? ocrResult . usage_info . doc_size_bytes
352- : Number ( ocrResult . usage_info . doc_size_bytes ) ,
346+ typeof mistralData . usage_info . doc_size_bytes === 'number'
347+ ? mistralData . usage_info . doc_size_bytes
348+ : Number ( mistralData . usage_info . doc_size_bytes ) ,
353349 }
354350 : undefined
355351
356- // Create metadata object
357352 const metadata : any = {
358353 jobId,
359354 fileType,
360355 fileName,
361356 source : 'url' ,
362357 pageCount,
363358 usageInfo,
364- model : typeof ocrResult . model === 'string' ? ocrResult . model : 'mistral-ocr-latest' ,
359+ model : typeof mistralData . model === 'string' ? mistralData . model : 'mistral-ocr-latest' ,
365360 resultType,
366361 processedAt : new Date ( ) . toISOString ( ) ,
367362 }
368363
369- // Only include sourceUrl for non-file-upload sources or URLs that don't contain our API endpoint
370364 if (
371365 ! isFileUpload &&
372366 sourceUrl &&
@@ -376,7 +370,6 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
376370 metadata . sourceUrl = sourceUrl
377371 }
378372
379- // Return properly structured response
380373 const parserResponse : MistralParserOutput = {
381374 success : true ,
382375 output : {
0 commit comments