diff --git a/marker/services/azure_openai.py b/marker/services/azure_openai.py index b97977383..fd58b9ffb 100644 --- a/marker/services/azure_openai.py +++ b/marker/services/azure_openai.py @@ -102,6 +102,18 @@ def __call__( f"Rate limit error: {e}. Retrying in {wait_time} seconds... (Attempt {tries}/{total_tries})" ) time.sleep(wait_time) + except json.JSONDecodeError as e: + # The response was not valid JSON + if tries == total_tries: + # Last attempt failed. Give up + logger.error( + f"JSONDecodeError: {e}. Max retries reached. Giving up. (Attempt {tries}/{total_tries})", + ) + break + else: + logger.warning( + f"JSONDecodeError: {e}. (Attempt {tries}/{total_tries})", + ) except Exception as e: logger.error(f"Azure OpenAI inference failed: {e}") break diff --git a/marker/services/claude.py b/marker/services/claude.py index 2805f1ce9..67e3cad2e 100644 --- a/marker/services/claude.py +++ b/marker/services/claude.py @@ -7,7 +7,7 @@ import anthropic from anthropic import RateLimitError, APITimeoutError from marker.logger import get_logger -from pydantic import BaseModel +from pydantic import BaseModel, ValidationError from marker.schema.blocks import Block from marker.services import BaseService @@ -17,7 +17,7 @@ class ClaudeService(BaseService): claude_model_name: Annotated[ - str, "The name of the Google model to use for the service." + str, "The name of the Claude model to use for the service." ] = "claude-3-7-sonnet-20250219" claude_api_key: Annotated[str, "The Claude API key to use for the service."] = None max_claude_tokens: Annotated[ @@ -47,16 +47,14 @@ def validate_response(self, response_text: str, schema: type[T]) -> T: try: # Try to parse as JSON first out_schema = schema.model_validate_json(response_text) - out_json = out_schema.model_dump() - return out_json - except Exception: - try: - # Re-parse with fixed escapes - escaped_str = response_text.replace("\\", "\\\\") - out_schema = schema.model_validate_json(escaped_str) - return out_schema.model_dump() - except Exception: - return + except ValidationError: + # Re-parse with fixed escapes + escaped_str = response_text.replace("\\", "\\\\") + # If we fail again, let the ValidationError be handled by the caller + out_schema = schema.model_validate_json(escaped_str) + + out_json = out_schema.model_dump() + return out_json def get_client(self): return anthropic.Anthropic( @@ -127,6 +125,18 @@ def __call__( f"Rate limit error: {e}. Retrying in {wait_time} seconds... (Attempt {tries}/{total_tries})", ) time.sleep(wait_time) + except ValidationError as e: + # The response was not valid JSON + if tries == total_tries: + # Last attempt failed. Give up + logger.error( + f"ValidationError: {e}. Max retries reached. Giving up. (Attempt {tries}/{total_tries})", + ) + break + else: + logger.warning( + f"ValidationError: {e}. (Attempt {tries}/{total_tries})", + ) except Exception as e: logger.error(f"Error during Claude API call: {e}") break diff --git a/marker/services/openai.py b/marker/services/openai.py index 4a5447424..27531f017 100644 --- a/marker/services/openai.py +++ b/marker/services/openai.py @@ -119,6 +119,18 @@ def __call__( f"Rate limit error: {e}. Retrying in {wait_time} seconds... (Attempt {tries}/{total_tries})", ) time.sleep(wait_time) + except json.JSONDecodeError as e: + # The response was not valid JSON + if tries == total_tries: + # Last attempt failed. Give up + logger.error( + f"JSONDecodeError: {e}. Max retries reached. Giving up. (Attempt {tries}/{total_tries})", + ) + break + else: + logger.warning( + f"JSONDecodeError: {e}. (Attempt {tries}/{total_tries})", + ) except Exception as e: logger.error(f"OpenAI inference failed: {e}") break