1+ import { createLogger } from '@/lib/logs/console-logger'
12import { ToolConfig } from '../types'
23import { MistralParserInput , MistralParserOutput } from './types'
34
5+ const logger = createLogger ( 'mistral-parser' )
6+
47export const mistralParserTool : ToolConfig < MistralParserInput , MistralParserOutput > = {
58 id : 'mistral_parser' ,
69 name : 'Mistral PDF Parser' ,
@@ -57,7 +60,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
5760 url : 'https://api.mistral.ai/v1/ocr' ,
5861 method : 'POST' ,
5962 headers : ( params ) => {
60- console . log (
63+ logger . info (
6164 'Setting up headers with API key:' ,
6265 params . apiKey ? `${ params . apiKey . substring ( 0 , 5 ) } ...` : 'Missing'
6366 )
@@ -100,7 +103,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
100103
101104 // Set the filePath parameter
102105 params . filePath = uploadedFilePath
103- console . log ( 'Using uploaded file:' , uploadedFilePath )
106+ logger . info ( 'Using uploaded file:' , uploadedFilePath )
104107 } else {
105108 throw new Error ( 'Invalid file upload: Upload data is missing or invalid' )
106109 }
@@ -138,14 +141,14 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
138141 if ( ! pathname . endsWith ( '.pdf' ) ) {
139142 // Check if PDF is included in the path at all
140143 if ( ! pathname . includes ( 'pdf' ) ) {
141- console . warn (
144+ logger . warn (
142145 'Warning: URL does not appear to point to a PDF document. ' +
143146 'The Mistral OCR API is designed to work with PDF files. ' +
144147 'Please ensure your URL points to a valid PDF document (ideally ending with .pdf extension).'
145148 )
146149 } else {
147150 // If "pdf" is in the URL but not at the end, give a different warning
148- console . warn (
151+ logger . warn (
149152 'Warning: URL contains "pdf" but does not end with .pdf extension. ' +
150153 'This might still work if the server returns a valid PDF document despite the missing extension.'
151154 )
@@ -172,7 +175,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
172175 // Include images (base64)
173176 if ( params . includeImageBase64 !== undefined ) {
174177 if ( typeof params . includeImageBase64 !== 'boolean' ) {
175- console . warn ( 'includeImageBase64 parameter should be a boolean, using default (false)' )
178+ logger . warn ( 'includeImageBase64 parameter should be a boolean, using default (false)' )
176179 } else {
177180 requestBody . include_image_base64 = params . includeImageBase64
178181 }
@@ -190,16 +193,16 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
190193 requestBody . pages = validPages
191194
192195 if ( validPages . length !== params . pages . length ) {
193- console . warn (
196+ logger . warn (
194197 `Some invalid page numbers were removed. ` +
195198 `Using ${ validPages . length } valid pages: ${ validPages . join ( ', ' ) } `
196199 )
197200 }
198201 } else {
199- console . warn ( 'No valid page numbers provided, processing all pages' )
202+ logger . warn ( 'No valid page numbers provided, processing all pages' )
200203 }
201204 } else if ( Array . isArray ( params . pages ) && params . pages . length === 0 ) {
202- console . warn ( 'Empty pages array provided, processing all pages' )
205+ logger . warn ( 'Empty pages array provided, processing all pages' )
203206 }
204207 }
205208
@@ -209,7 +212,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
209212 if ( Number . isInteger ( imageLimit ) && imageLimit > 0 ) {
210213 requestBody . image_limit = imageLimit
211214 } else {
212- console . warn ( 'imageLimit must be a positive integer, ignoring this parameter' )
215+ logger . warn ( 'imageLimit must be a positive integer, ignoring this parameter' )
213216 }
214217 }
215218
@@ -219,12 +222,12 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
219222 if ( Number . isInteger ( imageMinSize ) && imageMinSize > 0 ) {
220223 requestBody . image_min_size = imageMinSize
221224 } else {
222- console . warn ( 'imageMinSize must be a positive integer, ignoring this parameter' )
225+ logger . warn ( 'imageMinSize must be a positive integer, ignoring this parameter' )
223226 }
224227 }
225228
226229 // Log the request (with sensitive data redacted)
227- console . log ( 'Mistral OCR request:' , {
230+ logger . info ( 'Mistral OCR request:' , {
228231 url : url . toString ( ) ,
229232 hasApiKey : ! ! params . apiKey ,
230233 model : requestBody . model ,
@@ -267,12 +270,16 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
267270 // Set default values and extract from params if available
268271 let resultType : 'markdown' | 'text' | 'json' = 'markdown'
269272 let sourceUrl = ''
273+ let isFileUpload = false
270274
271275 if ( params && typeof params === 'object' ) {
272276 if ( params . filePath && typeof params . filePath === 'string' ) {
273277 sourceUrl = params . filePath . trim ( )
274278 }
275279
280+ // Check if this was a file upload
281+ isFileUpload = ! ! params . fileUpload
282+
276283 if ( params . resultType && [ 'markdown' , 'text' , 'json' ] . includes ( params . resultType ) ) {
277284 resultType = params . resultType as 'markdown' | 'text' | 'json'
278285 }
@@ -296,7 +303,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
296303 . filter ( Boolean )
297304 . join ( '\n\n' )
298305 } else {
299- console . warn ( 'No pages found in OCR result, returning raw response' )
306+ logger . warn ( 'No pages found in OCR result, returning raw response' )
300307 content = JSON . stringify ( ocrResult , null , 2 )
301308 }
302309
@@ -331,7 +338,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
331338 }
332339 }
333340 } catch ( urlError ) {
334- console . warn ( 'Failed to parse document URL:' , urlError )
341+ logger . warn ( 'Failed to parse document URL:' , urlError )
335342 }
336343 }
337344
@@ -355,35 +362,47 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
355362 }
356363 : undefined
357364
365+ // Create metadata object
366+ const metadata : any = {
367+ jobId,
368+ fileType,
369+ fileName,
370+ source : 'url' ,
371+ pageCount,
372+ usageInfo,
373+ model : typeof ocrResult . model === 'string' ? ocrResult . model : 'mistral-ocr-latest' ,
374+ resultType,
375+ processedAt : new Date ( ) . toISOString ( ) ,
376+ }
377+
378+ // Only include sourceUrl for non-file-upload sources or URLs that don't contain our API endpoint
379+ if (
380+ ! isFileUpload &&
381+ sourceUrl &&
382+ ! sourceUrl . includes ( '/api/files/serve/' ) &&
383+ ! sourceUrl . includes ( 's3.amazonaws.com' )
384+ ) {
385+ metadata . sourceUrl = sourceUrl
386+ }
387+
358388 // Return properly structured response
359389 const parserResponse : MistralParserOutput = {
360390 success : true ,
361391 output : {
362392 content,
363- metadata : {
364- jobId,
365- fileType,
366- fileName,
367- source : 'url' ,
368- sourceUrl,
369- pageCount,
370- usageInfo,
371- model : typeof ocrResult . model === 'string' ? ocrResult . model : 'mistral-ocr-latest' ,
372- resultType,
373- processedAt : new Date ( ) . toISOString ( ) ,
374- } ,
393+ metadata,
375394 } ,
376395 }
377396
378397 return parserResponse
379398 } catch ( error ) {
380- console . error ( 'Error processing OCR result:' , error )
399+ logger . error ( 'Error processing OCR result:' , error )
381400 throw error
382401 }
383402 } ,
384403
385404 transformError : ( error ) => {
386- console . error ( 'Mistral OCR processing error:' , error )
405+ logger . error ( 'Mistral OCR processing error:' , error )
387406
388407 // Helper function to extract message from various error types
389408 const getErrorMessage = ( err : any ) : string => {
0 commit comments