Skip to content

Commit 8e06c7d

Browse files
committed
improvement(mistral-OCR): error handling
1 parent 852945b commit 8e06c7d

File tree

2 files changed

+64
-11
lines changed

2 files changed

+64
-11
lines changed

sim/blocks/blocks/mistral-parse.ts

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ import { MistralIcon } from '@/components/icons'
44

55
export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
66
type: 'mistral_parse',
7-
name: 'Mistral PDF Parser',
7+
name: 'Mistral Parser',
88
description: 'Extract text from PDF documents',
99
longDescription:
10-
'Extract text and structure from PDF documents using Mistral\'s OCR API. Enter a URL to a PDF document, configure processing options, and get the content in your preferred format.',
10+
'Extract text and structure from PDF documents using Mistral\'s OCR API. Enter a URL to a PDF document (.pdf extension required), configure processing options, and get the content in your preferred format. The URL must be publicly accessible and point to a valid PDF file. Note: Google Drive, Dropbox, and other cloud storage links are not supported; use a direct download URL from a web server instead.',
1111
category: 'tools',
1212
bgColor: '#000000',
1313
icon: MistralIcon,
@@ -95,6 +95,24 @@ export const MistralParseBlock: BlockConfig<MistralParserOutput> = {
9595
if (!['http:', 'https:'].includes(validatedUrl.protocol)) {
9696
throw new Error(`URL must use HTTP or HTTPS protocol. Found: ${validatedUrl.protocol}`);
9797
}
98+
99+
// Check for PDF extension and provide specific guidance
100+
const pathname = validatedUrl.pathname.toLowerCase();
101+
if (!pathname.endsWith('.pdf')) {
102+
if (!pathname.includes('pdf')) {
103+
throw new Error(
104+
'The URL does not appear to point to a PDF document. ' +
105+
'Please provide a URL that ends with .pdf extension. ' +
106+
'If your document is not a PDF, please convert it to PDF format first.'
107+
);
108+
} else {
109+
// PDF is in the name but not at the end, so give a warning but proceed
110+
console.warn(
111+
'Warning: URL contains "pdf" but does not end with .pdf extension. ' +
112+
'This might still work if the server returns a valid PDF document.'
113+
);
114+
}
115+
}
98116
} catch (error) {
99117
const errorMessage = error instanceof Error ? error.message : String(error);
100118
throw new Error(`Invalid URL format: ${errorMessage}`);

sim/tools/mistral/parser.ts

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -173,14 +173,33 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
173173
throw new Error(`Invalid protocol: ${url.protocol}. URL must use HTTP or HTTPS protocol`);
174174
}
175175

176-
// Validate file appears to be a PDF (loose check)
177-
const pathname = url.pathname.toLowerCase();
178-
if (!pathname.endsWith('.pdf') && !pathname.includes('pdf')) {
179-
console.warn(
180-
'Warning: URL does not appear to be a PDF document. ' +
181-
'If this is incorrect, the document may still be processed if it is a valid PDF.'
176+
// Validate against known unsupported services
177+
if (url.hostname.includes('drive.google.com') || url.hostname.includes('docs.google.com')) {
178+
throw new Error(
179+
'Google Drive links are not supported by the Mistral OCR API. ' +
180+
'Please upload your PDF to a public web server or provide a direct download link ' +
181+
'that ends with .pdf extension.'
182182
);
183183
}
184+
185+
// Validate file appears to be a PDF (stricter check with informative warning)
186+
const pathname = url.pathname.toLowerCase();
187+
if (!pathname.endsWith('.pdf')) {
188+
// Check if PDF is included in the path at all
189+
if (!pathname.includes('pdf')) {
190+
console.warn(
191+
'Warning: URL does not appear to point to a PDF document. ' +
192+
'The Mistral OCR API is designed to work with PDF files. ' +
193+
'Please ensure your URL points to a valid PDF document (ideally ending with .pdf extension).'
194+
);
195+
} else {
196+
// If "pdf" is in the URL but not at the end, give a different warning
197+
console.warn(
198+
'Warning: URL contains "pdf" but does not end with .pdf extension. ' +
199+
'This might still work if the server returns a valid PDF document despite the missing extension.'
200+
);
201+
}
202+
}
184203
} catch (error) {
185204
const errorMessage = error instanceof Error ? error.message : String(error);
186205
throw new Error(
@@ -417,6 +436,12 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
417436
// Get base error message
418437
const errorMsg = getErrorMessage(error);
419438

439+
// Handle null reference errors which often occur with invalid PDF URLs
440+
if (errorMsg.includes('Cannot read properties of null') ||
441+
(errorMsg.includes('null') && errorMsg.includes('length'))) {
442+
return 'Mistral OCR Error: Invalid PDF document URL. The URL provided either does not point to a valid PDF file or the PDF cannot be accessed. Please ensure you provide a direct link to a publicly accessible PDF file with .pdf extension.';
443+
}
444+
420445
// Handle common API error status codes
421446
if (typeof error === 'object' && error !== null) {
422447
const status = error.status || (error.response && error.response.status);
@@ -434,7 +459,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
434459
case 413:
435460
return 'Mistral OCR Error: The PDF document is too large for processing.';
436461
case 415:
437-
return 'Mistral OCR Error: Unsupported file format. Please ensure the URL points to a valid PDF document.';
462+
return 'Mistral OCR Error: Unsupported file format. Please ensure the URL points to a valid PDF document with a .pdf extension.';
438463
case 429:
439464
return 'Mistral OCR Error: Rate limit exceeded. Please try again later.';
440465
case 500:
@@ -448,7 +473,7 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
448473

449474
// Handle common network and URL errors
450475
if (errorMsg.includes('URL') || errorMsg.includes('protocol') || errorMsg.includes('http')) {
451-
return 'Mistral OCR Error: Invalid PDF URL format. Please provide a complete URL starting with https:// to your PDF document.';
476+
return 'Mistral OCR Error: Invalid PDF URL format. Please provide a complete URL starting with https:// to your PDF document (e.g., https://example.com/document.pdf).';
452477
}
453478

454479
if (errorMsg.includes('ETIMEDOUT') || errorMsg.includes('timeout') || errorMsg.includes('ECONNABORTED')) {
@@ -463,7 +488,17 @@ export const mistralParserTool: ToolConfig<MistralParserInput, MistralParserOutp
463488
return 'Mistral OCR Error: Failed to parse the response from the OCR service.';
464489
}
465490

491+
// PDF-specific error handling
492+
if (errorMsg.toLowerCase().includes('pdf')) {
493+
if (errorMsg.toLowerCase().includes('invalid') || errorMsg.toLowerCase().includes('corrupted')) {
494+
return 'Mistral OCR Error: The document appears to be an invalid or corrupted PDF. Please check that the URL points to a valid, properly formatted PDF document.';
495+
}
496+
if (errorMsg.toLowerCase().includes('password') || errorMsg.toLowerCase().includes('protected') || errorMsg.toLowerCase().includes('encrypted')) {
497+
return 'Mistral OCR Error: The PDF document appears to be password-protected or encrypted. The OCR service cannot process protected documents.';
498+
}
499+
}
500+
466501
// Default error message with the original error for context
467-
return `Mistral OCR Error: ${errorMsg}`;
502+
return `Mistral OCR Error: Invalid PDF document or URL. Please ensure you provide a direct link to a valid PDF file. Technical details: ${errorMsg}`;
468503
},
469504
}

0 commit comments

Comments
 (0)