Skip to content

Commit f904958

Browse files
committed
improvement(tools): modified return type of mistral parser
1 parent 40a917c commit f904958

File tree

3 files changed

+49
-30
lines changed

3 files changed

+49
-30
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
export * from './registry';
1+
export * from './registry'

sim/tools/mistral/parser.ts

Lines changed: 46 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
import { createLogger } from '@/lib/logs/console-logger'
12
import { ToolConfig } from '../types'
23
import { MistralParserInput, MistralParserOutput } from './types'
34

5+
const logger = createLogger('mistral-parser')
6+
47
export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutput> = {
58
id: 'mistral_parser',
69
name: 'Mistral PDF Parser',
@@ -57,7 +60,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
5760
url: 'https://api.mistral.ai/v1/ocr',
5861
method: 'POST',
5962
headers: (params) => {
60-
console.log(
63+
logger.info(
6164
'Setting up headers with API key:',
6265
params.apiKey ? `${params.apiKey.substring(0, 5)}...` : 'Missing'
6366
)
@@ -100,7 +103,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
100103

101104
// Set the filePath parameter
102105
params.filePath = uploadedFilePath
103-
console.log('Using uploaded file:', uploadedFilePath)
106+
logger.info('Using uploaded file:', uploadedFilePath)
104107
} else {
105108
throw new Error('Invalid file upload: Upload data is missing or invalid')
106109
}
@@ -138,14 +141,14 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
138141
if (!pathname.endsWith('.pdf')) {
139142
// Check if PDF is included in the path at all
140143
if (!pathname.includes('pdf')) {
141-
console.warn(
144+
logger.warn(
142145
'Warning: URL does not appear to point to a PDF document. ' +
143146
'The Mistral OCR API is designed to work with PDF files. ' +
144147
'Please ensure your URL points to a valid PDF document (ideally ending with .pdf extension).'
145148
)
146149
} else {
147150
// If "pdf" is in the URL but not at the end, give a different warning
148-
console.warn(
151+
logger.warn(
149152
'Warning: URL contains "pdf" but does not end with .pdf extension. ' +
150153
'This might still work if the server returns a valid PDF document despite the missing extension.'
151154
)
@@ -172,7 +175,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
172175
// Include images (base64)
173176
if (params.includeImageBase64 !== undefined) {
174177
if (typeof params.includeImageBase64 !== 'boolean') {
175-
console.warn('includeImageBase64 parameter should be a boolean, using default (false)')
178+
logger.warn('includeImageBase64 parameter should be a boolean, using default (false)')
176179
} else {
177180
requestBody.include_image_base64 = params.includeImageBase64
178181
}
@@ -190,16 +193,16 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
190193
requestBody.pages = validPages
191194

192195
if (validPages.length !== params.pages.length) {
193-
console.warn(
196+
logger.warn(
194197
`Some invalid page numbers were removed. ` +
195198
`Using ${validPages.length} valid pages: ${validPages.join(', ')}`
196199
)
197200
}
198201
} else {
199-
console.warn('No valid page numbers provided, processing all pages')
202+
logger.warn('No valid page numbers provided, processing all pages')
200203
}
201204
} else if (Array.isArray(params.pages) && params.pages.length === 0) {
202-
console.warn('Empty pages array provided, processing all pages')
205+
logger.warn('Empty pages array provided, processing all pages')
203206
}
204207
}
205208

@@ -209,7 +212,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
209212
if (Number.isInteger(imageLimit) && imageLimit > 0) {
210213
requestBody.image_limit = imageLimit
211214
} else {
212-
console.warn('imageLimit must be a positive integer, ignoring this parameter')
215+
logger.warn('imageLimit must be a positive integer, ignoring this parameter')
213216
}
214217
}
215218

@@ -219,12 +222,12 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
219222
if (Number.isInteger(imageMinSize) && imageMinSize > 0) {
220223
requestBody.image_min_size = imageMinSize
221224
} else {
222-
console.warn('imageMinSize must be a positive integer, ignoring this parameter')
225+
logger.warn('imageMinSize must be a positive integer, ignoring this parameter')
223226
}
224227
}
225228

226229
// Log the request (with sensitive data redacted)
227-
console.log('Mistral OCR request:', {
230+
logger.info('Mistral OCR request:', {
228231
url: url.toString(),
229232
hasApiKey: !!params.apiKey,
230233
model: requestBody.model,
@@ -267,12 +270,16 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
267270
// Set default values and extract from params if available
268271
let resultType: 'markdown' | 'text' | 'json' = 'markdown'
269272
let sourceUrl = ''
273+
let isFileUpload = false
270274

271275
if (params && typeof params === 'object') {
272276
if (params.filePath && typeof params.filePath === 'string') {
273277
sourceUrl = params.filePath.trim()
274278
}
275279

280+
// Check if this was a file upload
281+
isFileUpload = !!params.fileUpload
282+
276283
if (params.resultType && ['markdown', 'text', 'json'].includes(params.resultType)) {
277284
resultType = params.resultType as 'markdown' | 'text' | 'json'
278285
}
@@ -296,7 +303,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
296303
.filter(Boolean)
297304
.join('\n\n')
298305
} else {
299-
console.warn('No pages found in OCR result, returning raw response')
306+
logger.warn('No pages found in OCR result, returning raw response')
300307
content = JSON.stringify(ocrResult, null, 2)
301308
}
302309

@@ -331,7 +338,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
331338
}
332339
}
333340
} catch (urlError) {
334-
console.warn('Failed to parse document URL:', urlError)
341+
logger.warn('Failed to parse document URL:', urlError)
335342
}
336343
}
337344

@@ -355,35 +362,47 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
355362
}
356363
: undefined
357364

365+
// Create metadata object
366+
const metadata: any = {
367+
jobId,
368+
fileType,
369+
fileName,
370+
source: 'url',
371+
pageCount,
372+
usageInfo,
373+
model: typeof ocrResult.model === 'string' ? ocrResult.model : 'mistral-ocr-latest',
374+
resultType,
375+
processedAt: new Date().toISOString(),
376+
}
377+
378+
// Only include sourceUrl for non-file-upload sources or URLs that don't contain our API endpoint
379+
if (
380+
!isFileUpload &&
381+
sourceUrl &&
382+
!sourceUrl.includes('/api/files/serve/') &&
383+
!sourceUrl.includes('s3.amazonaws.com')
384+
) {
385+
metadata.sourceUrl = sourceUrl
386+
}
387+
358388
// Return properly structured response
359389
const parserResponse: MistralParserOutput = {
360390
success: true,
361391
output: {
362392
content,
363-
metadata: {
364-
jobId,
365-
fileType,
366-
fileName,
367-
source: 'url',
368-
sourceUrl,
369-
pageCount,
370-
usageInfo,
371-
model: typeof ocrResult.model === 'string' ? ocrResult.model : 'mistral-ocr-latest',
372-
resultType,
373-
processedAt: new Date().toISOString(),
374-
},
393+
metadata,
375394
},
376395
}
377396

378397
return parserResponse
379398
} catch (error) {
380-
console.error('Error processing OCR result:', error)
399+
logger.error('Error processing OCR result:', error)
381400
throw error
382401
}
383402
},
384403

385404
transformError: (error) => {
386-
console.error('Mistral OCR processing error:', error)
405+
logger.error('Mistral OCR processing error:', error)
387406

388407
// Helper function to extract message from various error types
389408
const getErrorMessage = (err: any): string => {

sim/tools/mistral/types.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ export interface MistralParserMetadata {
5656
/** Source type (always 'url' for now) */
5757
source: 'url'
5858

59-
/** Original URL to the document */
60-
sourceUrl: string
59+
/** Original URL to the document (only included for user-provided URLs) */
60+
sourceUrl?: string
6161

6262
/** Total number of pages in the document */
6363
pageCount: number

0 commit comments

Comments
 (0)