diff --git a/litellm/llms/base_llm/ocr/transformation.py b/litellm/llms/base_llm/ocr/transformation.py index 2fe8f3def757..f1d8e3ce8d28 100644 --- a/litellm/llms/base_llm/ocr/transformation.py +++ b/litellm/llms/base_llm/ocr/transformation.py @@ -21,6 +21,7 @@ class OCRPageDimensions(LiteLLMPydanticObjectBase): """Page dimensions from OCR response.""" + dpi: Optional[int] = None height: Optional[int] = None width: Optional[int] = None @@ -28,27 +29,30 @@ class OCRPageDimensions(LiteLLMPydanticObjectBase): class OCRPageImage(LiteLLMPydanticObjectBase): """Image extracted from OCR page.""" + image_base64: Optional[str] = None bbox: Optional[Dict[str, Any]] = None - + model_config = {"extra": "allow"} class OCRPage(LiteLLMPydanticObjectBase): """Single page from OCR response.""" + index: int markdown: str images: Optional[List[OCRPageImage]] = None dimensions: Optional[OCRPageDimensions] = None - + model_config = {"extra": "allow"} class OCRUsageInfo(LiteLLMPydanticObjectBase): """Usage information from OCR response.""" + pages_processed: Optional[int] = None doc_size_bytes: Optional[int] = None - + model_config = {"extra": "allow"} @@ -57,12 +61,13 @@ class OCRResponse(LiteLLMPydanticObjectBase): Standard OCR response format. Standardized to Mistral OCR format - other providers should transform to this format. """ + pages: List[OCRPage] model: str document_annotation: Optional[Any] = None usage_info: Optional[OCRUsageInfo] = None object: str = "ocr" - + model_config = {"extra": "allow"} # Define private attributes using PrivateAttr @@ -71,6 +76,7 @@ class OCRResponse(LiteLLMPydanticObjectBase): class OCRRequestData(LiteLLMPydanticObjectBase): """OCR request data structure.""" + data: Optional[Union[Dict, bytes]] = None files: Optional[Dict[str, Any]] = None @@ -82,6 +88,7 @@ class BaseOCRConfig: """ def __init__(self) -> None: + # No initialization required pass def get_supported_ocr_params(self, model: str) -> list: @@ -138,17 +145,19 @@ def transform_ocr_request( """ Transform OCR request to provider-specific format. Override in provider-specific implementations. - + Args: model: Model name document: Document to process (Mistral format dict, or file path, bytes, etc.) optional_params: Optional parameters for the request headers: Request headers - + Returns: OCRRequestData with data and files fields """ - raise NotImplementedError("transform_ocr_request must be implemented by provider") + raise NotImplementedError( + "transform_ocr_request must be implemented by provider" + ) async def async_transform_ocr_request( self, @@ -162,24 +171,25 @@ async def async_transform_ocr_request( Async transform OCR request to provider-specific format. Optional method - providers can override if they need async transformations (e.g., Azure AI for URL-to-base64 conversion). - + Default implementation falls back to sync transform_ocr_request. - + Args: model: Model name document: Document to process (Mistral format dict, or file path, bytes, etc.) optional_params: Optional parameters for the request headers: Request headers - + Returns: OCRRequestData with data and files fields """ # Default implementation: call sync version + # Optimize call by using positional arguments return self.transform_ocr_request( - model=model, - document=document, - optional_params=optional_params, - headers=headers, + model, + document, + optional_params, + headers, **kwargs, ) @@ -194,7 +204,9 @@ def transform_ocr_response( Transform provider-specific OCR response to standard format. Override in provider-specific implementations. """ - raise NotImplementedError("transform_ocr_response must be implemented by provider") + raise NotImplementedError( + "transform_ocr_response must be implemented by provider" + ) def get_error_class( self, @@ -208,4 +220,3 @@ def get_error_class( message=error_message, headers=headers, ) -