diff --git a/.github/workflows/CI_license_compliance.yml b/.github/workflows/CI_license_compliance.yml
index 5cc46dd718..f6b7351abb 100644
--- a/.github/workflows/CI_license_compliance.yml
+++ b/.github/workflows/CI_license_compliance.yml
@@ -13,13 +13,14 @@ on:
 env:
   CORE_DATADOG_API_KEY: ${{ secrets.CORE_DATADOG_API_KEY }}
   PYTHON_VERSION: "3.10"
-  EXCLUDE_PACKAGES: "(?i)^(azure-identity|fastembed|ragas|tqdm|psycopg).*"
+  EXCLUDE_PACKAGES: "(?i)^(azure-identity|fastembed|ragas|tqdm|psycopg|mistralai).*"
 
   # Exclusions must be explicitly motivated
   #
   # - azure-identity is MIT but the license is not available on PyPI
   # - fastembed is Apache 2.0 but the license on PyPI is unclear ("Other/Proprietary License (Apache License)")
   # - ragas is Apache 2.0 but the license is not available on PyPI
+  # - mistralai is Apache 2.0 but the license is not available on PyPI
 
   # - tqdm is MLP but there are no better alternatives
   # - psycopg is LGPL-3.0 but FOSSA is fine with it
diff --git a/README.md b/README.md
index 00d4515b52..999b66fc93 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
 | [llama-stack-haystack](integrations/llama_stack/)                                                                  | Generator                   | [![PyPI - Version](https://img.shields.io/pypi/v/llama-stack-haystack.svg?color=orange)](https://pypi.org/project/llama-stack-haystack)                      | [![Test / llama-stack](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/llama_stack.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/llama_stack.yml)                                  |
 | [mcp-haystack](integrations/mcp/)                                                                              | Tool                        | [![PyPI - Version](https://img.shields.io/pypi/v/mcp-haystack.svg?color=orange)](https://pypi.org/project/mcp-haystack)                                  | [![Test / mcp](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mcp.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mcp.yml)                                                    |
 | [meta-llama-haystack](integrations/meta_llama/)                                                                | Generator                   | [![PyPI - Version](https://img.shields.io/pypi/v/meta-llama-haystack.svg?color=orange)](https://pypi.org/project/meta-llama-haystack)                    | [![Test / meta_llama](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/meta_llama.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/meta_llama.yml)                                                    |
-| [mistral-haystack](integrations/mistral/)                                                                      | Embedder, Generator         | [![PyPI - Version](https://img.shields.io/pypi/v/mistral-haystack.svg)](https://pypi.org/project/mistral-haystack)                                       | [![Test / mistral](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mistral.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mistral.yml)                                        |
+| [mistral-haystack](integrations/mistral/)                                                                      | Converter, Embedder, Generator         | [![PyPI - Version](https://img.shields.io/pypi/v/mistral-haystack.svg)](https://pypi.org/project/mistral-haystack)                                       | [![Test / mistral](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mistral.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mistral.yml)                                        |
 | [mongodb-atlas-haystack](integrations/mongodb_atlas/)                                                          | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/mongodb-atlas-haystack.svg?color=orange)](https://pypi.org/project/mongodb-atlas-haystack)              | [![Test / mongodb-atlas](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mongodb_atlas.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/mongodb_atlas.yml)                      |
 | [nvidia-haystack](integrations/nvidia/)                                                                        | Embedder, Generator, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/nvidia-haystack.svg?color=orange)](https://pypi.org/project/nvidia-haystack)                            | [![Test / nvidia](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/nvidia.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/nvidia.yml)                                           |
 | [ollama-haystack](integrations/ollama/)                                                                        | Embedder, Generator         | [![PyPI - Version](https://img.shields.io/pypi/v/ollama-haystack.svg?color=orange)](https://pypi.org/project/ollama-haystack)                            | [![Test / ollama](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ollama.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/ollama.yml)                                           |
diff --git a/integrations/mistral/examples/indexing_ocr_pipeline.py b/integrations/mistral/examples/indexing_ocr_pipeline.py
new file mode 100644
index 0000000000..0258231b78
--- /dev/null
+++ b/integrations/mistral/examples/indexing_ocr_pipeline.py
@@ -0,0 +1,87 @@
+# To run this example, you will need to:
+# 1. Set a `MISTRAL_API_KEY` environment variable
+# 2. Place a PDF file named `sample.pdf` in the same directory as this script
+#
+# This example demonstrates OCR document processing with structured annotations,
+# embedding the extracted documents using Mistral embeddings, and storing them
+# in an InMemoryDocumentStore for later retrieval.
+#
+# You can customize the ImageAnnotation and DocumentAnnotation schemas below
+# to extract different structured information from your documents.
+
+from typing import List
+
+from haystack import Pipeline
+from haystack.components.writers import DocumentWriter
+from haystack.document_stores.in_memory import InMemoryDocumentStore
+from mistralai.models import DocumentURLChunk
+from pydantic import BaseModel, Field
+
+from haystack_integrations.components.converters.mistral.ocr_document_converter import (
+    MistralOCRDocumentConverter,
+)
+from haystack_integrations.components.embedders.mistral.document_embedder import (
+    MistralDocumentEmbedder,
+)
+
+
+# Define schema for structured image annotations (bbox)
+class ImageAnnotation(BaseModel):
+    image_type: str = Field(..., description="The type of image content")
+    description: str = Field(..., description="Brief description of the image")
+
+
+# Define schema for structured document annotations
+class DocumentAnnotation(BaseModel):
+    language: str = Field(..., description="Primary language of the document")
+    urls: List[str] = Field(..., description="URLs found in the document")
+    topics: List[str] = Field(..., description="Main topics covered in the document")
+
+
+# Initialize document store
+document_store = InMemoryDocumentStore()
+
+# Create indexing pipeline
+indexing_pipeline = Pipeline()
+
+# Add components to the pipeline
+indexing_pipeline.add_component(
+    "converter",
+    MistralOCRDocumentConverter(pages=[0, 1]),
+)
+indexing_pipeline.add_component(
+    "embedder",
+    MistralDocumentEmbedder(),
+)
+indexing_pipeline.add_component(
+    "writer",
+    DocumentWriter(document_store=document_store),
+)
+
+# Connect components
+indexing_pipeline.connect("converter.documents", "embedder.documents")
+indexing_pipeline.connect("embedder.documents", "writer.documents")
+
+# Prepare sources: URL and local file
+sources = [
+    DocumentURLChunk(document_url="https://arxiv.org/pdf/1706.03762"),
+    "./sample.pdf",  # Local PDF file
+]
+
+# Run the pipeline with annotation schemas
+result = indexing_pipeline.run(
+    {
+        "converter": {
+            "sources": sources,
+            "bbox_annotation_schema": ImageAnnotation,
+            "document_annotation_schema": DocumentAnnotation,
+        }
+    }
+)
+
+
+# Check out documents processed by OCR.
+# Optional with enriched content (from bbox annotation) and semantic meta data (from document annotation)
+documents = document_store.storage
+# Check out mistral api response for unprocessed data and with usage_info
+raw_mistral_response = result["converter"]["raw_mistral_response"]
diff --git a/integrations/mistral/pydoc/config.yml b/integrations/mistral/pydoc/config.yml
index c26843a549..7ac3b8b999 100644
--- a/integrations/mistral/pydoc/config.yml
+++ b/integrations/mistral/pydoc/config.yml
@@ -5,6 +5,7 @@ loaders:
       "haystack_integrations.components.embedders.mistral.document_embedder",
       "haystack_integrations.components.embedders.mistral.text_embedder",
       "haystack_integrations.components.generators.mistral.chat.chat_generator",
+      "haystack_integrations.components.converters.mistral.ocr_document_converter",
     ]
     ignore_when_discovered: ["__init__"]
 processors:
diff --git a/integrations/mistral/pydoc/config_docusaurus.yml b/integrations/mistral/pydoc/config_docusaurus.yml
index 9340803a2c..275c911e8e 100644
--- a/integrations/mistral/pydoc/config_docusaurus.yml
+++ b/integrations/mistral/pydoc/config_docusaurus.yml
@@ -5,6 +5,7 @@ loaders:
   - haystack_integrations.components.embedders.mistral.document_embedder
   - haystack_integrations.components.embedders.mistral.text_embedder
   - haystack_integrations.components.generators.mistral.chat.chat_generator
+  - haystack_integrations.components.converters.mistral.ocr_document_converter
   search_path:
   - ../src
   type: haystack_pydoc_tools.loaders.CustomPythonLoader
diff --git a/integrations/mistral/pyproject.toml b/integrations/mistral/pyproject.toml
index bcc53c9d3e..15be36f58d 100644
--- a/integrations/mistral/pyproject.toml
+++ b/integrations/mistral/pyproject.toml
@@ -23,7 +23,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.19.0"]
+dependencies = ["haystack-ai>=2.19.0", "mistralai>=1.9.11"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/mistral#readme"
@@ -58,7 +58,7 @@ dependencies = [
     "pytest-rerunfailures",
     "mypy",
     "pip",
-    "pytz"
+    "pytz",
 ]
 
 [tool.hatch.envs.test.scripts]
@@ -68,7 +68,8 @@ all = 'pytest {args:tests}'
 cov-retry = 'all --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x'
 
 types = """mypy -p haystack_integrations.components.embedders.mistral \
--p haystack_integrations.components.generators.mistral {args}"""
+-p haystack_integrations.components.generators.mistral \
+-p haystack_integrations.components.converters {args}"""
 
 [tool.mypy]
 install_types = true
diff --git a/integrations/mistral/src/haystack_integrations/components/converters/mistral/__init__.py b/integrations/mistral/src/haystack_integrations/components/converters/mistral/__init__.py
new file mode 100644
index 0000000000..d1e037a7d3
--- /dev/null
+++ b/integrations/mistral/src/haystack_integrations/components/converters/mistral/__init__.py
@@ -0,0 +1,3 @@
+from .ocr_document_converter import MistralOCRDocumentConverter
+
+__all__ = ["MistralOCRDocumentConverter"]
diff --git a/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py b/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py
new file mode 100644
index 0000000000..47c79fb91a
--- /dev/null
+++ b/integrations/mistral/src/haystack_integrations/components/converters/mistral/ocr_document_converter.py
@@ -0,0 +1,412 @@
+import json
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Type, Union
+
+from haystack import Document, component, default_from_dict, default_to_dict, logging
+from haystack.components.converters.utils import get_bytestream_from_source
+from haystack.dataclasses import ByteStream
+from haystack.utils import Secret, deserialize_secrets_inplace
+from mistralai import Mistral
+from mistralai.extra import response_format_from_pydantic_model
+from mistralai.models import (
+    DocumentURLChunk,
+    FileChunk,
+    ImageURLChunk,
+    OCRResponse,
+)
+from pydantic import BaseModel
+
+logger = logging.getLogger(__name__)
+
+
+@component
+class MistralOCRDocumentConverter:
+    """
+    This component extracts text from documents using Mistral's OCR API, with optional structured
+    annotations for both individual image regions (bounding boxes) and full documents.
+
+    Accepts document sources in various formats (str/Path for local files, ByteStream for in-memory data,
+    DocumentURLChunk for document URLs, ImageURLChunk for image URLs, or FileChunk for Mistral file IDs)
+    and retrieves the recognized text via Mistral's OCR service. Local files are automatically uploaded
+    to Mistral's storage.
+    Returns Haystack Documents (one per source) containing all pages concatenated with form feed characters (\\f),
+    ensuring compatibility with Haystack's DocumentSplitter for accurate page-wise splitting and overlap handling.
+
+    **How Annotations Work:**
+    When annotation schemas (`bbox_annotation_schema` or `document_annotation_schema`) are provided,
+    the OCR model first extracts text and structure from the document. Then, a Vision LLM is called
+    to analyze the content and generate structured annotations according to your defined schemas.
+    For more details, see: https://docs.mistral.ai/capabilities/document_ai/annotations/#how-it-works
+
+    **Usage Example:**
+    ```python
+    from haystack.utils import Secret
+    from haystack_integrations.mistral import MistralOCRDocumentConverter
+    from mistralai.models import DocumentURLChunk, ImageURLChunk, FileChunk
+
+    converter = MistralOCRDocumentConverter(
+        api_key=Secret.from_env_var("MISTRAL_API_KEY"),
+        model="mistral-ocr-2505"
+    )
+
+    # Process multiple sources
+    sources = [
+        DocumentURLChunk(document_url="https://example.com/document.pdf"),
+        ImageURLChunk(image_url="https://example.com/receipt.jpg"),
+        FileChunk(file_id="file-abc123"),
+    ]
+    result = converter.run(sources=sources)
+
+    documents = result["documents"]  # List of 3 Documents
+    raw_responses = result["raw_mistral_response"]  # List of 3 raw responses
+    ```
+
+    **Structured Output Example:**
+    ```python
+    from pydantic import BaseModel, Field
+    from haystack_integrations.mistral import MistralOCRDocumentConverter
+
+    # Define schema for structured image annotations
+    class ImageAnnotation(BaseModel):
+        image_type: str = Field(..., description="The type of image content")
+        short_description: str = Field(..., description="Short natural-language description")
+        summary: str = Field(..., description="Detailed summary of the image content")
+
+    # Define schema for structured document annotations
+    class DocumentAnnotation(BaseModel):
+        language: str = Field(..., description="Primary language of the document")
+        chapter_titles: List[str] = Field(..., description="Detected chapter or section titles")
+        urls: List[str] = Field(..., description="URLs found in the text")
+
+    converter = MistralOCRDocumentConverter(
+        model="mistral-ocr-2505",
+    )
+
+    sources = [DocumentURLChunk(document_url="https://example.com/report.pdf")]
+    result = converter.run(
+        sources=sources,
+        bbox_annotation_schema=ImageAnnotation,
+        document_annotation_schema=DocumentAnnotation,
+    )
+
+    documents = result["documents"]
+    raw_responses = result["raw_mistral_response"]
+    ```
+    """
+
+    def __init__(
+        self,
+        api_key: Secret = Secret.from_env_var("MISTRAL_API_KEY"),
+        model: str = "mistral-ocr-2505",
+        include_image_base64: bool = False,
+        pages: Optional[List[int]] = None,
+        image_limit: Optional[int] = None,
+        image_min_size: Optional[int] = None,
+        cleanup_uploaded_files: bool = True,
+    ):
+        """
+        Creates a MistralOCRDocumentConverter component.
+
+        :param api_key:
+            The Mistral API key. Defaults to the MISTRAL_API_KEY environment variable.
+        :param model:
+            The OCR model to use. Default is "mistral-ocr-2505".
+            See more: https://docs.mistral.ai/getting-started/models/models_overview/
+        :param include_image_base64:
+            If True, includes base64 encoded images in the response.
+            This may significantly increase response size and processing time.
+        :param pages:
+            Specific page numbers to process (0-indexed). If None, processes all pages.
+        :param image_limit:
+            Maximum number of images to extract from the document.
+        :param image_min_size:
+            Minimum height and width (in pixels) for images to be extracted.
+        :param cleanup_uploaded_files:
+            If True, automatically deletes files uploaded to Mistral after processing.
+            Only affects files uploaded from local sources (str, Path, ByteStream).
+            Files provided as FileChunk are not deleted. Default is True.
+        """
+        self.api_key = api_key
+        self.model = model
+        self.include_image_base64 = include_image_base64
+        self.pages = pages
+        self.image_limit = image_limit
+        self.image_min_size = image_min_size
+        self.cleanup_uploaded_files = cleanup_uploaded_files
+
+        # Initialize Mistral client
+        self.client = Mistral(api_key=self.api_key.resolve_value())
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Serializes the component to a dictionary.
+
+        :returns:
+            Dictionary with serialized data.
+        """
+        return default_to_dict(
+            self,
+            api_key=self.api_key.to_dict(),
+            model=self.model,
+            include_image_base64=self.include_image_base64,
+            pages=self.pages,
+            image_limit=self.image_limit,
+            image_min_size=self.image_min_size,
+            cleanup_uploaded_files=self.cleanup_uploaded_files,
+        )
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "MistralOCRDocumentConverter":
+        """
+        Deserializes the component from a dictionary.
+
+        :param data:
+            Dictionary to deserialize from.
+        :returns:
+            Deserialized component.
+        """
+        deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
+        return default_from_dict(cls, data)
+
+    @component.output_types(documents=List[Document], raw_mistral_response=List[Dict[str, Any]])
+    def run(
+        self,
+        sources: List[Union[str, Path, ByteStream, DocumentURLChunk, FileChunk, ImageURLChunk]],
+        bbox_annotation_schema: Optional[Type[BaseModel]] = None,
+        document_annotation_schema: Optional[Type[BaseModel]] = None,
+    ) -> Dict[str, Any]:
+        """
+        Extract text from documents using Mistral OCR.
+
+        :param sources:
+            List of document sources to process. Each source can be one of:
+            - str: File path to a local document
+            - Path: Path object to a local document
+            - ByteStream: Haystack ByteStream object containing document data
+            - DocumentURLChunk: Mistral chunk for document URLs (signed or public URLs to PDFs, etc.)
+            - ImageURLChunk: Mistral chunk for image URLs (signed or public URLs to images)
+            - FileChunk: Mistral chunk for file IDs (files previously uploaded to Mistral)
+        :param bbox_annotation_schema:
+            Optional Pydantic model for structured annotations per bounding box.
+            When provided, a Vision LLM analyzes each image region and returns structured data.
+        :param document_annotation_schema:
+            Optional Pydantic model for structured annotations for the full document.
+            When provided, a Vision LLM analyzes the entire document and returns structured data.
+            Note: Document annotation is limited to a maximum of 8 pages. Documents exceeding
+            this limit will not be processed for document annotation.
+
+        :returns:
+            A dictionary with the following keys:
+            - `documents`: List of Haystack Documents (one per source). Each Document has the following structure:
+                - `content`: All pages joined with form feed (\\f) separators in markdown format.
+                  When using bbox_annotation_schema, image tags will be enriched with your defined descriptions.
+                - `meta`: Aggregated metadata dictionary with structure:
+                  {"source_page_count": int, "source_total_images": int, "source_*": any}.
+                  If document_annotation_schema was provided, all annotation fields are unpacked
+                  with 'source_' prefix (e.g., source_language, source_chapter_titles, source_urls).
+            - `raw_mistral_response`:
+                List of dictionaries containing raw OCR responses from Mistral API (one per source).
+                Each response includes per-page details, images, annotations, and usage info.
+        """
+        # Convert Pydantic models to Mistral ResponseFormat schemas
+        bbox_annotation_format = (
+            response_format_from_pydantic_model(bbox_annotation_schema) if bbox_annotation_schema else None
+        )
+        document_annotation_format = (
+            response_format_from_pydantic_model(document_annotation_schema) if document_annotation_schema else None
+        )
+
+        # Process each source
+        documents = []
+        raw_responses = []
+        uploaded_file_ids = []
+
+        for source in sources:
+            document, raw_response, uploaded_file_id = self._process_single_source(
+                source,
+                bbox_annotation_format,
+                document_annotation_format,
+                document_annotation_schema,
+            )
+
+            # Add results if processing succeeded
+            if document is not None and raw_response is not None:
+                documents.append(document)
+                raw_responses.append(raw_response)
+
+            # Track uploaded file for cleanup even if processing failed
+            if uploaded_file_id:
+                uploaded_file_ids.append(uploaded_file_id)
+
+        # Cleanup uploaded files
+        self._cleanup_uploaded_files(uploaded_file_ids)
+
+        return {"documents": documents, "raw_mistral_response": raw_responses}
+
+    def _process_single_source(
+        self,
+        source: Union[str, Path, ByteStream, DocumentURLChunk, FileChunk, ImageURLChunk],
+        bbox_annotation_format: Optional[Any],
+        document_annotation_format: Optional[Any],
+        document_annotation_schema: Optional[Type[BaseModel]],
+    ) -> tuple[Optional[Document], Optional[Dict[str, Any]], Optional[str]]:
+        """
+        Process a single source and return the document, raw response, and file_id if uploaded.
+
+        :param source:
+            The source to process.
+        :param bbox_annotation_format:
+            Optional response format for bounding box annotations.
+        :param document_annotation_format:
+            Optional response format for document annotations.
+        :param document_annotation_schema:
+            Optional Pydantic model for document-level annotations.
+
+        :returns:
+            A tuple of (Document|None, raw_response_dict|None, uploaded_file_id|None).
+            Returns (None, None, uploaded_file_id) if processing fails but file was uploaded.
+        """
+        uploaded_file_id = None
+        try:
+            chunk = self._convert_source_to_chunk(source)
+
+            # Track if we uploaded this file
+            if isinstance(source, (str, Path, ByteStream)) and isinstance(chunk, FileChunk):
+                uploaded_file_id = chunk.file_id
+
+            ocr_response: OCRResponse = self.client.ocr.process(
+                model=self.model,
+                document=chunk,
+                include_image_base64=self.include_image_base64,
+                pages=self.pages,
+                image_limit=self.image_limit,
+                image_min_size=self.image_min_size,
+                bbox_annotation_format=bbox_annotation_format,
+                document_annotation_format=document_annotation_format,
+            )
+
+            document = self._process_ocr_response(ocr_response, document_annotation_schema)
+            return (document, ocr_response.model_dump(), uploaded_file_id)
+        except Exception as e:
+            logger.warning(
+                "Could not process source {source}. Skipping it. Error: {error}",
+                source=source,
+                error=e,
+            )
+            return (None, None, uploaded_file_id)
+
+    def _cleanup_uploaded_files(self, file_ids: List[str]) -> None:
+        """
+        Delete uploaded files from Mistral storage.
+
+        :param file_ids:
+            List of file IDs to delete.
+        """
+        if not self.cleanup_uploaded_files or not file_ids:
+            return
+
+        for file_id in file_ids:
+            try:
+                self.client.files.delete(file_id=file_id)
+            except Exception as e:
+                logger.warning(
+                    "Failed to delete uploaded file {file_id}. Error: {error}",
+                    file_id=file_id,
+                    error=e,
+                )
+
+    def _convert_source_to_chunk(
+        self,
+        source: Union[str, Path, ByteStream, DocumentURLChunk, FileChunk, ImageURLChunk],
+    ) -> Union[DocumentURLChunk, FileChunk, ImageURLChunk]:
+        """
+        Convert various source types to Mistral-compatible chunk format.
+
+        Local sources (str, Path, ByteStream) are uploaded to Mistral's storage and returned
+        as FileChunk. Remote sources (DocumentURLChunk, ImageURLChunk, FileChunk) are returned as-is.
+
+        :param source:
+            The source to convert. Can be a file path (str/Path), ByteStream, or Mistral chunk type.
+
+        :returns:
+            A Mistral chunk type (DocumentURLChunk, FileChunk, or ImageURLChunk).
+        """
+        # If already a Mistral chunk type, return as-is
+        if isinstance(source, (DocumentURLChunk, FileChunk, ImageURLChunk)):
+            return source
+
+        # Convert str/Path/ByteStream to ByteStream
+        bytestream = get_bytestream_from_source(source=source)
+
+        # Upload file to Mistral and get file ID
+        uploaded_file = self.client.files.upload(
+            file={
+                "file_name": bytestream.meta.get("file_path", "document"),
+                "content": bytestream.data,
+            },
+            purpose="ocr",
+        )
+
+        # Return FileChunk with the uploaded file ID
+        return FileChunk(file_id=uploaded_file.id)
+
+    def _process_ocr_response(
+        self,
+        ocr_response: OCRResponse,
+        document_annotation_schema: Optional[Type[BaseModel]],
+    ) -> Document:
+        """
+        Convert an OCR response from Mistral API into a single Haystack Document.
+
+        :param ocr_response:
+            The OCR response object from Mistral API.
+        :param document_annotation_schema:
+            Optional Pydantic model for document-level annotations.
+
+        :returns:
+            A single Haystack Document containing the processed OCR content.
+        """
+        # Convert OCR pages to a single Haystack Document
+        # We add "\f" separators between pages to differentiate them and make them usable across other components
+        page_contents = []
+        total_images = 0
+
+        for page in ocr_response.pages:
+            # Enrich markdown content with structured image annotations inline
+            enriched_content = page.markdown
+            for img in page.images:
+                if img.image_annotation:
+                    # Regex pattern to find ![img-id](img-id) and insert annotation after it
+                    pattern = f"!\\[{re.escape(img.id)}\\]\\({re.escape(img.id)}\\)"
+                    replacement = f"![{img.id}]({img.id})\n\n**Image Annotation:** {img.image_annotation}\n"
+                    enriched_content = re.sub(pattern, replacement, enriched_content)
+
+            page_contents.append(enriched_content)
+            total_images += len(page.images)
+
+        # Join all pages with form feed character (\f) as separator
+        all_content = "\f".join(page_contents)
+
+        # Parse and filter document-level annotations to schema-defined fields
+        try:
+            parsed = json.loads(ocr_response.document_annotation or "{}")
+            if document_annotation_schema:
+                allowed = document_annotation_schema.model_fields.keys()
+                parsed = {k: v for k, v in parsed.items() if k in allowed}
+            doc_annotation_meta = {f"source_{k}": v for k, v in parsed.items()}
+        except Exception:
+            doc_annotation_meta = {}
+
+        # Create a single Document with aggregated metadata
+        document = Document(
+            content=all_content,
+            meta={
+                "source_page_count": len(ocr_response.pages),
+                "source_total_images": total_images,
+                # Unpack document annotation
+                **doc_annotation_meta,
+            },
+        )
+
+        return document
diff --git a/integrations/mistral/src/haystack_integrations/components/converters/py.typed b/integrations/mistral/src/haystack_integrations/components/converters/py.typed
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/integrations/mistral/tests/test_ocr_document_converter.py b/integrations/mistral/tests/test_ocr_document_converter.py
new file mode 100644
index 0000000000..9ca001f732
--- /dev/null
+++ b/integrations/mistral/tests/test_ocr_document_converter.py
@@ -0,0 +1,608 @@
+# SPDX-FileCopyrightText: 2023-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+import os
+from typing import List
+from unittest.mock import MagicMock, patch
+
+import pytest
+from haystack import Document
+from haystack.dataclasses import ByteStream
+from haystack.utils import Secret
+from mistralai.models import DocumentURLChunk, FileChunk, ImageURLChunk
+from pydantic import BaseModel, Field
+
+from haystack_integrations.components.converters.mistral import (
+    MistralOCRDocumentConverter,
+)
+
+
+class TestMistralOCRDocumentConverter:
+    CLASS_TYPE = (
+        "haystack_integrations.components.converters.mistral.ocr_document_converter.MistralOCRDocumentConverter"
+    )
+
+    def test_init_default(self, monkeypatch):
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-api-key")
+        converter = MistralOCRDocumentConverter()
+
+        assert converter.api_key == Secret.from_env_var("MISTRAL_API_KEY")
+        assert converter.model == "mistral-ocr-2505"
+        assert converter.include_image_base64 is False
+        assert converter.pages is None
+        assert converter.image_limit is None
+        assert converter.image_min_size is None
+
+    def test_init_with_all_optional_parameters(self):
+        converter = MistralOCRDocumentConverter(
+            api_key=Secret.from_token("test-api-key"),
+            model="mistral-ocr-custom",
+            include_image_base64=True,
+            pages=[0, 1, 2],
+            image_limit=10,
+            image_min_size=100,
+        )
+
+        assert converter.api_key == Secret.from_token("test-api-key")
+        assert converter.model == "mistral-ocr-custom"
+        assert converter.include_image_base64 is True
+        assert converter.pages == [0, 1, 2]
+        assert converter.image_limit == 10
+        assert converter.image_min_size == 100
+
+    def test_to_dict(self, monkeypatch):
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-api-key")
+        converter = MistralOCRDocumentConverter()
+        converter_dict = converter.to_dict()
+
+        assert converter_dict == {
+            "type": self.CLASS_TYPE,
+            "init_parameters": {
+                "api_key": {
+                    "env_vars": ["MISTRAL_API_KEY"],
+                    "strict": True,
+                    "type": "env_var",
+                },
+                "model": "mistral-ocr-2505",
+                "include_image_base64": False,
+                "pages": None,
+                "image_limit": None,
+                "image_min_size": None,
+                "cleanup_uploaded_files": True,
+            },
+        }
+
+    def test_to_dict_with_custom_parameters(self, monkeypatch):
+        monkeypatch.setenv("ENV_VAR", "test-api-key")
+        converter = MistralOCRDocumentConverter(
+            api_key=Secret.from_env_var("ENV_VAR", strict=False),
+            model="mistral-ocr-custom",
+            include_image_base64=True,
+            pages=[0, 1, 2],
+            image_limit=10,
+            image_min_size=100,
+            cleanup_uploaded_files=False,
+        )
+        converter_dict = converter.to_dict()
+
+        assert converter_dict == {
+            "type": self.CLASS_TYPE,
+            "init_parameters": {
+                "api_key": {
+                    "type": "env_var",
+                    "env_vars": ["ENV_VAR"],
+                    "strict": False,
+                },
+                "model": "mistral-ocr-custom",
+                "include_image_base64": True,
+                "pages": [0, 1, 2],
+                "image_limit": 10,
+                "image_min_size": 100,
+                "cleanup_uploaded_files": False,
+            },
+        }
+
+    def test_from_dict(self, monkeypatch):
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-api-key")
+        converter_dict = {
+            "type": self.CLASS_TYPE,
+            "init_parameters": {
+                "api_key": {
+                    "env_vars": ["MISTRAL_API_KEY"],
+                    "strict": True,
+                    "type": "env_var",
+                },
+                "model": "mistral-ocr-2505",
+                "include_image_base64": False,
+                "pages": None,
+                "image_limit": None,
+                "image_min_size": None,
+                "cleanup_uploaded_files": True,
+            },
+        }
+
+        converter = MistralOCRDocumentConverter.from_dict(converter_dict)
+
+        assert converter.model == "mistral-ocr-2505"
+        assert converter.include_image_base64 is False
+        assert converter.pages is None
+        assert converter.image_limit is None
+        assert converter.image_min_size is None
+        assert converter.cleanup_uploaded_files is True
+
+    def test_from_dict_with_custom_parameters(self, monkeypatch):
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-api-key")
+        converter_dict = {
+            "type": self.CLASS_TYPE,
+            "init_parameters": {
+                "api_key": {
+                    "env_vars": ["MISTRAL_API_KEY"],
+                    "strict": True,
+                    "type": "env_var",
+                },
+                "model": "mistral-ocr-custom",
+                "include_image_base64": True,
+                "pages": [0, 1, 2],
+                "image_limit": 10,
+                "image_min_size": 100,
+                "cleanup_uploaded_files": False,
+            },
+        }
+
+        converter = MistralOCRDocumentConverter.from_dict(converter_dict)
+
+        assert converter.model == "mistral-ocr-custom"
+        assert converter.include_image_base64 is True
+        assert converter.pages == [0, 1, 2]
+        assert converter.image_limit == 10
+        assert converter.image_min_size == 100
+        assert converter.cleanup_uploaded_files is False
+
+    @pytest.fixture
+    def mock_ocr_response(self):
+        """Create a mock OCR response"""
+        mock_page = MagicMock()
+        mock_page.markdown = "# Sample Document\n\nThis is page 1."
+        mock_page.images = []
+
+        mock_response = MagicMock()
+        mock_response.pages = [mock_page]
+        mock_response.document_annotation = None
+        mock_response.model_dump.return_value = {
+            "pages": [{"markdown": "# Sample Document\n\nThis is page 1.", "images": []}],
+            "document_annotation": None,
+        }
+        return mock_response
+
+    @pytest.fixture
+    def mock_ocr_response_with_multiple_pages(self):
+        """Create a mock OCR response with multiple pages"""
+        mock_page1 = MagicMock()
+        mock_page1.markdown = "# Page 1"
+        mock_page1.images = []
+
+        mock_page2 = MagicMock()
+        mock_page2.markdown = "# Page 2"
+        mock_page2.images = []
+
+        mock_response = MagicMock()
+        mock_response.pages = [mock_page1, mock_page2]
+        mock_response.document_annotation = None
+        mock_response.model_dump.return_value = {
+            "pages": [
+                {"markdown": "# Page 1", "images": []},
+                {"markdown": "# Page 2", "images": []},
+            ],
+            "document_annotation": None,
+        }
+        return mock_response
+
+    @pytest.mark.parametrize(
+        "source",
+        [
+            DocumentURLChunk(document_url="https://example.com/doc.pdf"),
+            FileChunk(file_id="file-123"),
+            ImageURLChunk(image_url="https://example.com/image.jpg"),
+        ],
+        ids=["document_url_chunk", "file_chunk", "image_url_chunk"],
+    )
+    def test_run_with_remote_chunk_types(self, mock_ocr_response, source):
+        """Test processing with remote chunk types (DocumentURLChunk, FileChunk, ImageURLChunk)"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"))
+
+        with patch.object(converter.client.ocr, "process", return_value=mock_ocr_response):
+            result = converter.run(sources=[source])
+
+            assert len(result["documents"]) == 1
+            assert isinstance(result["documents"][0], Document)
+            assert result["documents"][0].content == "# Sample Document\n\nThis is page 1."
+            # Metadata assertions apply to all chunk types
+            if isinstance(source, DocumentURLChunk):
+                assert result["documents"][0].meta["source_page_count"] == 1
+                assert result["documents"][0].meta["source_total_images"] == 0
+
+    @pytest.mark.parametrize(
+        "source_type",
+        ["file_path_str", "path_object", "bytestream"],
+    )
+    def test_run_with_local_sources(self, mock_ocr_response, tmp_path, source_type):
+        """Test processing with local source types (str, Path, ByteStream)"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"))
+
+        # Create temporary file if needed
+        if source_type in ["file_path_str", "path_object"]:
+            test_file = tmp_path / "test.pdf"
+            test_file.write_bytes(b"fake pdf content")
+
+        # Create the source based on type
+        if source_type == "file_path_str":
+            source = str(test_file)
+        elif source_type == "path_object":
+            source = test_file
+        else:  # bytestream
+            source = ByteStream(data=b"fake pdf content", meta={"file_path": "test.pdf"})
+
+        mock_uploaded_file = MagicMock()
+        mock_uploaded_file.id = "uploaded-file-123"
+
+        with patch.object(converter.client.files, "upload", return_value=mock_uploaded_file):
+            with patch.object(converter.client.ocr, "process", return_value=mock_ocr_response):
+                with patch.object(converter.client.files, "delete"):
+                    result = converter.run(sources=[source])
+
+                    assert len(result["documents"]) == 1
+                    assert isinstance(result["documents"][0], Document)
+                    # Verify file was uploaded for local sources
+                    if source_type == "file_path_str":
+                        converter.client.files.upload.assert_called_once()
+
+    def test_run_with_multiple_sources(self, mock_ocr_response, tmp_path):
+        """Test processing with multiple mixed source types"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"))
+
+        # Create a temporary file
+        test_file = tmp_path / "test.pdf"
+        test_file.write_bytes(b"fake pdf content")
+
+        mock_uploaded_file = MagicMock()
+        mock_uploaded_file.id = "uploaded-file-123"
+
+        with patch.object(converter.client.files, "upload", return_value=mock_uploaded_file):
+            with patch.object(converter.client.ocr, "process", return_value=mock_ocr_response):
+                with patch.object(converter.client.files, "delete"):
+                    sources = [
+                        DocumentURLChunk(document_url="https://example.com/doc.pdf"),
+                        FileChunk(file_id="file-123"),
+                        str(test_file),
+                    ]
+                    result = converter.run(sources=sources)
+
+                    assert len(result["documents"]) == 3
+                    assert all(isinstance(doc, Document) for doc in result["documents"])
+
+    def test_run_with_bbox_annotations(self):
+        """Test processing with bbox annotation schema"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"))
+
+        # Define annotation schema
+        class ImageAnnotation(BaseModel):
+            image_type: str = Field(..., description="Type of image")
+
+        # Create mock response with image annotation
+        mock_image = MagicMock()
+        mock_image.id = "img-1"
+        mock_image.image_annotation = '{"image_type": "diagram"}'
+
+        mock_page = MagicMock()
+        mock_page.markdown = "# Document\n\n![img-1](img-1)"
+        mock_page.images = [mock_image]
+
+        mock_response = MagicMock()
+        mock_response.pages = [mock_page]
+        mock_response.document_annotation = None
+        mock_response.model_dump.return_value = {
+            "pages": [],
+            "document_annotation": None,
+        }
+
+        with patch.object(converter.client.ocr, "process", return_value=mock_response):
+            sources = [DocumentURLChunk(document_url="https://example.com/doc.pdf")]
+            result = converter.run(sources=sources, bbox_annotation_schema=ImageAnnotation)
+
+            assert len(result["documents"]) == 1
+            # Check that image annotation was enriched in content
+            assert "Image Annotation:" in result["documents"][0].content
+
+    def test_run_with_document_annotations(self):
+        """Test processing with document annotation schema"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"))
+
+        # Define annotation schema
+        class DocumentAnnotation(BaseModel):
+            language: str = Field(..., description="Document language")
+            topics: List[str] = Field(..., description="Main topics")
+
+        # Create mock response with document annotation
+        mock_page = MagicMock()
+        mock_page.markdown = "# Document"
+        mock_page.images = []
+
+        mock_response = MagicMock()
+        mock_response.pages = [mock_page]
+        mock_response.document_annotation = '{"language": "en", "topics": ["AI", "ML"]}'
+        mock_response.model_dump.return_value = {
+            "pages": [],
+            "document_annotation": '{"language": "en", "topics": ["AI", "ML"]}',
+        }
+
+        with patch.object(converter.client.ocr, "process", return_value=mock_response):
+            sources = [DocumentURLChunk(document_url="https://example.com/doc.pdf")]
+            result = converter.run(sources=sources, document_annotation_schema=DocumentAnnotation)
+
+            assert len(result["documents"]) == 1
+            # Check that document annotations are in metadata
+            assert result["documents"][0].meta["source_language"] == "en"
+            assert result["documents"][0].meta["source_topics"] == ["AI", "ML"]
+
+    def test_run_with_both_annotations(self):
+        """Test processing with both bbox and document annotation schemas"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"))
+
+        class ImageAnnotation(BaseModel):
+            image_type: str = Field(..., description="Type of image")
+
+        class DocumentAnnotation(BaseModel):
+            language: str = Field(..., description="Document language")
+
+        # Create mock response
+        mock_image = MagicMock()
+        mock_image.id = "img-1"
+        mock_image.image_annotation = '{"image_type": "chart"}'
+
+        mock_page = MagicMock()
+        mock_page.markdown = "![img-1](img-1)"
+        mock_page.images = [mock_image]
+
+        mock_response = MagicMock()
+        mock_response.pages = [mock_page]
+        mock_response.document_annotation = '{"language": "en"}'
+        mock_response.model_dump.return_value = {
+            "pages": [],
+            "document_annotation": '{"language": "en"}',
+        }
+
+        with patch.object(converter.client.ocr, "process", return_value=mock_response):
+            sources = [DocumentURLChunk(document_url="https://example.com/doc.pdf")]
+            result = converter.run(
+                sources=sources,
+                bbox_annotation_schema=ImageAnnotation,
+                document_annotation_schema=DocumentAnnotation,
+            )
+
+            assert len(result["documents"]) == 1
+            assert "Image Annotation:" in result["documents"][0].content
+            assert result["documents"][0].meta["source_language"] == "en"
+
+    def test_run_with_pages_parameter(self, mock_ocr_response):
+        """Test that pages parameter is passed to API"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"), pages=[0, 1])
+
+        with patch.object(converter.client.ocr, "process", return_value=mock_ocr_response) as mock_process:
+            sources = [DocumentURLChunk(document_url="https://example.com/doc.pdf")]
+            result = converter.run(sources=sources)
+
+            # Verify pages parameter was passed
+            call_args = mock_process.call_args
+            assert call_args.kwargs["pages"] == [0, 1]
+            assert len(result["documents"]) == 1
+
+    def test_run_handles_api_error(self, mock_ocr_response):
+        """Test error handling when API fails"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"))
+
+        with patch.object(converter.client.ocr, "process") as mock_process:
+            # First call succeeds, second fails, third succeeds
+            mock_process.side_effect = [
+                mock_ocr_response,
+                Exception("API Error"),
+                mock_ocr_response,
+            ]
+
+            sources = [
+                DocumentURLChunk(document_url="https://example.com/doc1.pdf"),
+                DocumentURLChunk(document_url="https://example.com/doc2.pdf"),
+                DocumentURLChunk(document_url="https://example.com/doc3.pdf"),
+            ]
+            result = converter.run(sources=sources)
+
+            # Should only return 2 documents (failed source skipped)
+            assert len(result["documents"]) == 2
+            assert len(result["raw_mistral_response"]) == 2
+
+    def test_process_ocr_response_multiple_pages(self, mock_ocr_response_with_multiple_pages):
+        """Test multi-page document with form feed separator"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"))
+
+        document = converter._process_ocr_response(
+            mock_ocr_response_with_multiple_pages, document_annotation_schema=None
+        )
+
+        assert isinstance(document, Document)
+        # Pages should be separated by \f
+        assert document.content == "# Page 1\f# Page 2"
+        assert "\f" in document.content
+        assert document.meta["source_page_count"] == 2
+
+    def test_process_ocr_response_with_images(self):
+        """Test metadata extraction with images"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"))
+
+        # Create mock response with images
+        mock_image1 = MagicMock()
+        mock_image1.id = "img-1"
+        mock_image1.image_annotation = None
+
+        mock_image2 = MagicMock()
+        mock_image2.id = "img-2"
+        mock_image2.image_annotation = None
+
+        mock_page = MagicMock()
+        mock_page.markdown = "# Document with images"
+        mock_page.images = [mock_image1, mock_image2]
+
+        mock_response = MagicMock()
+        mock_response.pages = [mock_page]
+        mock_response.document_annotation = None
+
+        document = converter._process_ocr_response(mock_response, document_annotation_schema=None)
+
+        assert document.meta["source_page_count"] == 1
+        assert document.meta["source_total_images"] == 2
+
+    def test_run_with_cleanup_disabled(self, mock_ocr_response, tmp_path):
+        """Test that files are not deleted when cleanup_uploaded_files=False"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"), cleanup_uploaded_files=False)
+
+        # Create a temporary file
+        test_file = tmp_path / "test.pdf"
+        test_file.write_bytes(b"fake pdf content")
+
+        mock_uploaded_file = MagicMock()
+        mock_uploaded_file.id = "uploaded-file-123"
+
+        with patch.object(converter.client.files, "upload", return_value=mock_uploaded_file):
+            with patch.object(converter.client.ocr, "process", return_value=mock_ocr_response):
+                with patch.object(converter.client.files, "delete") as mock_delete:
+                    sources = [str(test_file)]
+                    result = converter.run(sources=sources)
+
+                    # Verify file was uploaded but NOT deleted
+                    assert len(result["documents"]) == 1
+                    converter.client.files.upload.assert_called_once()
+                    mock_delete.assert_not_called()
+
+    def test_run_cleanup_happens_on_ocr_failure(self, tmp_path):
+        """Test that cleanup happens even when OCR processing fails"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"), cleanup_uploaded_files=True)
+
+        # Create a temporary file
+        test_file = tmp_path / "test.pdf"
+        test_file.write_bytes(b"fake pdf content")
+
+        mock_uploaded_file = MagicMock()
+        mock_uploaded_file.id = "uploaded-file-123"
+
+        with patch.object(converter.client.files, "upload", return_value=mock_uploaded_file):
+            with patch.object(converter.client.ocr, "process", side_effect=Exception("OCR failed")):
+                with patch.object(converter.client.files, "delete") as mock_delete:
+                    sources = [str(test_file)]
+                    result = converter.run(sources=sources)
+
+                    # Verify no documents returned due to failure
+                    assert len(result["documents"]) == 0
+                    # But file should still be deleted
+                    mock_delete.assert_called_once_with(file_id="uploaded-file-123")
+
+    def test_run_cleanup_failure_does_not_break_flow(self, mock_ocr_response, tmp_path):
+        """Test that cleanup failures don't break the main flow"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"), cleanup_uploaded_files=True)
+
+        # Create a temporary file
+        test_file = tmp_path / "test.pdf"
+        test_file.write_bytes(b"fake pdf content")
+
+        mock_uploaded_file = MagicMock()
+        mock_uploaded_file.id = "uploaded-file-123"
+
+        with patch.object(converter.client.files, "upload", return_value=mock_uploaded_file):
+            with patch.object(converter.client.ocr, "process", return_value=mock_ocr_response):
+                with patch.object(
+                    converter.client.files,
+                    "delete",
+                    side_effect=Exception("Delete failed"),
+                ):
+                    sources = [str(test_file)]
+                    # Should not raise an exception
+                    result = converter.run(sources=sources)
+
+                    # Verify document was still processed successfully
+                    assert len(result["documents"]) == 1
+                    assert isinstance(result["documents"][0], Document)
+
+    def test_run_mixed_sources_only_uploaded_files_deleted(self, mock_ocr_response, tmp_path):
+        """Test that only uploaded files are deleted, not user-provided chunks"""
+        converter = MistralOCRDocumentConverter(api_key=Secret.from_token("test-api-key"), cleanup_uploaded_files=True)
+
+        # Create a temporary file
+        test_file = tmp_path / "test.pdf"
+        test_file.write_bytes(b"fake pdf content")
+
+        mock_uploaded_file = MagicMock()
+        mock_uploaded_file.id = "uploaded-file-123"
+
+        with patch.object(converter.client.files, "upload", return_value=mock_uploaded_file):
+            with patch.object(converter.client.ocr, "process", return_value=mock_ocr_response):
+                with patch.object(converter.client.files, "delete") as mock_delete:
+                    sources = [
+                        str(test_file),  # This will be uploaded
+                        FileChunk(file_id="user-file-123"),  # User-provided
+                        DocumentURLChunk(document_url="https://example.com/doc.pdf"),  # URL
+                    ]
+                    result = converter.run(sources=sources)
+
+                    # Verify all sources processed
+                    assert len(result["documents"]) == 3
+                    # Only the uploaded file should be deleted
+                    mock_delete.assert_called_once_with(file_id="uploaded-file-123")
+
+    @pytest.mark.skipif(
+        not os.environ.get("MISTRAL_API_KEY"),
+        reason="Export an env var called MISTRAL_API_KEY containing the Mistral API key to run this test.",
+    )
+    @pytest.mark.integration
+    def test_integration_run_with_document_url(self):
+        """Integration test with real API call using arxiv PDF"""
+        converter = MistralOCRDocumentConverter()
+
+        sources = [DocumentURLChunk(document_url="https://arxiv.org/pdf/1706.03762")]
+        result = converter.run(sources=sources)
+
+        assert len(result["documents"]) == 1
+        assert isinstance(result["documents"][0], Document)
+        assert len(result["documents"][0].content) > 0
+        assert result["documents"][0].meta["source_page_count"] > 0
+        assert "raw_mistral_response" in result
+        assert len(result["raw_mistral_response"]) == 1
+
+    @pytest.mark.skipif(
+        not os.environ.get("MISTRAL_API_KEY"),
+        reason="Export an env var called MISTRAL_API_KEY containing the Mistral API key to run this test.",
+    )
+    @pytest.mark.integration
+    def test_integration_run_with_annotations(self):
+        """Integration test with real API call using annotation schemas"""
+        converter = MistralOCRDocumentConverter(pages=[0])  # Only process first page for speed
+
+        # Define simple annotation schemas
+        class ImageAnnotation(BaseModel):
+            image_type: str = Field(
+                ...,
+                description="The type of image content (e.g., diagram, chart, photo)",
+            )
+
+        class DocumentAnnotation(BaseModel):
+            language: str = Field(..., description="The primary language of the document")
+
+        sources = [DocumentURLChunk(document_url="https://arxiv.org/pdf/1706.03762")]
+        result = converter.run(
+            sources=sources,
+            bbox_annotation_schema=ImageAnnotation,
+            document_annotation_schema=DocumentAnnotation,
+        )
+
+        assert len(result["documents"]) == 1
+        doc = result["documents"][0]
+        assert isinstance(doc, Document)
+        assert len(doc.content) > 0
+        # Check if document annotation was added to metadata
+        assert "source_language" in doc.meta