Skip to content

Commit 6e1005b

Browse files
paultranvanclaude
andcommitted
refactor(phase-3): replace RuntimeError with typed OpenRAGError subclasses
Replace all 12 RuntimeError raises introduced by phase-3 with proper OpenRAGError subclasses that flow through the global handler: - indexer.py: RuntimeError → UnexpectedError, VDBError/EmbeddingError catches widened to OpenRAGError - llm.py: RuntimeError → UnexpectedError for streaming failures - reranker.py: RuntimeError → UnexpectedError - serializer.py: RuntimeError → FileStorageError (I/O) / UnexpectedError - marker.py: RuntimeError → FileStorageError (I/O) / UnexpectedError - ray_utils.py: RuntimeError → RayActorError (wraps RayTaskError) - Fix pre-existing lint issues (unused vars, unsorted imports) All exception raises now use `from e` chaining for proper tracebacks. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 05ec9fa commit 6e1005b

File tree

12 files changed

+47
-72
lines changed

12 files changed

+47
-72
lines changed

openrag/components/indexer/chunker/chunker.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import openai
44
from components.indexer.utils.text_sanitizer import sanitize_text
5+
from utils.exceptions.base import OpenRAGError
56
from components.prompts import CHUNK_CONTEXTUALIZER_PROMPT
67
from components.utils import detect_language, get_vlm_semaphore, load_config
78
from langchain_core.documents.base import Document
@@ -79,7 +80,7 @@ async def _generate_context(
7980
logger.error("VLM context generation failed", error=str(e))
8081
return ""
8182

82-
except Exception as e:
83+
except Exception:
8384
# Unexpected errors - log but still gracefully degrade
8485
logger.exception("Unexpected error during context generation")
8586
return ""
@@ -132,7 +133,9 @@ async def contextualize_chunks(
132133
for chunk, context in zip(chunks, contexts, strict=True)
133134
]
134135

135-
except Exception as e:
136+
except OpenRAGError:
137+
raise
138+
except Exception:
136139
logger.exception("Error contextualizing chunks", filename=filename)
137140
return chunks
138141

openrag/components/indexer/embeddings/openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def embedding_dimension(self) -> int:
3232
logger.error("Invalid embedding response format", error=str(e))
3333
raise EmbeddingResponseError("Unexpected response format", error=str(e))
3434

35-
except Exception as e:
35+
except Exception:
3636
logger.exception("Unexpected error getting embedding dimension")
3737
raise UnexpectedEmbeddingError("An unexpected error occurred")
3838

openrag/components/indexer/indexer.py

Lines changed: 11 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
import torch
1111
from config import load_config
1212
from langchain_core.documents.base import Document
13-
from utils.exceptions.embeddings import EmbeddingError
14-
from utils.exceptions.vectordb import VDBError
13+
from utils.exceptions.base import OpenRAGError
14+
from utils.exceptions.common import UnexpectedError
1515

1616
from .chunker import BaseChunker, ChunkerFactory
1717
from .utils import serialize_file
@@ -121,25 +121,8 @@ async def add_file(
121121
# Mark task as completed
122122
await task_state_manager.set_state.remote(task_id, "COMPLETED")
123123

124-
except OSError as e:
125-
# File I/O errors (FileNotFoundError, PermissionError, etc.)
126-
log.error("File operation failed", path=path, error=str(e))
127-
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
128-
await task_state_manager.set_state.remote(task_id, "FAILED")
129-
await task_state_manager.set_error.remote(task_id, tb)
130-
raise
131-
132-
except VDBError as e:
133-
# Database errors (already typed from vectordb)
134-
log.error("Database operation failed", error=str(e))
135-
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
136-
await task_state_manager.set_state.remote(task_id, "FAILED")
137-
await task_state_manager.set_error.remote(task_id, tb)
138-
raise
139-
140-
except EmbeddingError as e:
141-
# Embedding errors (already typed from embeddings)
142-
log.error("Embedding generation failed", error=str(e))
124+
except OpenRAGError as e:
125+
log.error("Operation failed during file ingestion", code=e.code, error=e.message)
143126
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
144127
await task_state_manager.set_state.remote(task_id, "FAILED")
145128
await task_state_manager.set_error.remote(task_id, tb)
@@ -151,7 +134,7 @@ async def add_file(
151134
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
152135
await task_state_manager.set_state.remote(task_id, "FAILED")
153136
await task_state_manager.set_error.remote(task_id, tb)
154-
raise RuntimeError("An unexpected error occurred during file processing")
137+
raise UnexpectedError("An unexpected error occurred during file processing") from e
155138

156139
finally:
157140
# GPU cleanup
@@ -185,15 +168,12 @@ async def delete_file(self, file_id: str, partition: str) -> bool:
185168
await vectordb.delete_file.remote(file_id, partition)
186169
log.info("Deleted file from partition.", file_id=file_id, partition=partition)
187170

188-
except VDBError as e:
189-
# Database errors (already typed from vectordb)
190-
log.error("Database operation failed in delete_file", error=str(e))
171+
except OpenRAGError:
191172
raise
192173

193174
except Exception as e:
194-
# Unexpected errors
195175
log.exception("Unexpected error in delete_file")
196-
raise RuntimeError("An unexpected error occurred during file deletion")
176+
raise UnexpectedError("An unexpected error occurred during file deletion") from e
197177

198178
@ray.method(concurrency_group="update")
199179
async def update_file_metadata(
@@ -219,15 +199,12 @@ async def update_file_metadata(
219199

220200
log.info("Metadata updated for file.")
221201

222-
except VDBError as e:
223-
# Database errors (already typed from vectordb)
224-
log.error("Database operation failed in update_file_metadata", error=str(e))
202+
except OpenRAGError:
225203
raise
226204

227205
except Exception as e:
228-
# Unexpected errors
229206
log.exception("Unexpected error in update_file_metadata")
230-
raise RuntimeError("An unexpected error occurred during metadata update")
207+
raise UnexpectedError("An unexpected error occurred during metadata update") from e
231208

232209
@ray.method(concurrency_group="update")
233210
async def copy_file(
@@ -258,15 +235,12 @@ async def copy_file(
258235
new_partition=metadata.get("partition"),
259236
)
260237

261-
except VDBError as e:
262-
# Database errors (already typed from vectordb)
263-
log.error("Database operation failed in copy_file", error=str(e))
238+
except OpenRAGError:
264239
raise
265240

266241
except Exception as e:
267-
# Unexpected errors
268242
log.exception("Unexpected error in copy_file")
269-
raise RuntimeError("An unexpected error occurred during file copy")
243+
raise UnexpectedError("An unexpected error occurred during file copy") from e
270244

271245
@ray.method(concurrency_group="search")
272246
async def asearch(

openrag/components/indexer/loaders/base.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import asyncio
22
import base64
3-
import binascii
43
import re
54
from abc import ABC, abstractmethod
65
from io import BytesIO

openrag/components/indexer/loaders/eml_loader.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from langchain_core.documents.base import Document
1111
from PIL import Image, UnidentifiedImageError
12+
from utils.exceptions.common import FileStorageError, UnexpectedError
1213

1314
from . import get_loader_classes
1415
from .base import BaseLoader
@@ -327,13 +328,13 @@ async def aload_document(self, file_path, metadata: dict | None = None, save_mar
327328
metadata["markdown_path"] = str(markdown_path)
328329
except OSError as e:
329330
# File I/O error reading email file
330-
raise ValueError(f"Cannot read email file: {e}")
331+
raise FileStorageError(f"Cannot read email file: {e}") from e
331332
except email.errors.MessageError as e:
332333
# Email parsing error
333-
raise ValueError(f"Invalid email format: {e}")
334+
raise UnexpectedError(f"Invalid email format: {e}") from e
334335
except Exception as e:
335336
# Unexpected error
336-
raise ValueError(f"Failed to parse the EML file {file_path}: {e}")
337+
raise UnexpectedError(f"Failed to parse the EML file {file_path}: {e}") from e
337338

338339
document = Document(page_content=content_body, metadata=metadata)
339340
return document

openrag/components/indexer/loaders/media_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
from components.utils import get_audio_semaphore
99
from langchain_core.documents.base import Document
10-
from openai import AsyncOpenAI, APIError
10+
from openai import APIError, AsyncOpenAI
1111
from pydub import AudioSegment, silence
1212
from tqdm.asyncio import tqdm
1313
from utils.logger import get_logger

openrag/components/indexer/loaders/pdf_loaders/marker.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from config import load_config
1010
from langchain_core.documents.base import Document
1111
from marker.converters.pdf import PdfConverter
12+
from utils.exceptions.common import FileStorageError, UnexpectedError
1213
from utils.logger import get_logger
1314

1415
from ..base import BaseLoader
@@ -107,13 +108,11 @@ def _process_pdf(file_path, config):
107108
logger.info("PDF processing cancelled", path=file_path)
108109
raise
109110
except OSError as e:
110-
# File I/O error
111111
logger.error("Cannot read PDF file", path=file_path, error=str(e))
112-
raise RuntimeError(f"Cannot read PDF file: {e}")
112+
raise FileStorageError(f"Cannot read PDF file: {e}") from e
113113
except Exception as e:
114-
# Marker library errors or unexpected failures
115114
logger.exception("Error processing PDF", path=file_path, error=str(e))
116-
raise RuntimeError("Failed to process PDF document")
115+
raise UnexpectedError("Failed to process PDF document") from e
117116
finally:
118117
gc.collect()
119118
if torch.cuda.is_available():
@@ -133,7 +132,7 @@ def run_with_timeout():
133132
return result
134133
except MPTimeoutError:
135134
self.logger.exception("MarkerWorker child process timed out", path=file_path)
136-
raise RuntimeError(f"PDF processing timed out")
135+
raise UnexpectedError("PDF processing timed out")
137136
except asyncio.CancelledError:
138137
# Cancellation - propagate
139138
self.logger.info("PDF processing cancelled", path=file_path)
@@ -242,7 +241,7 @@ async def aload_document(
242241
)
243242

244243
if not markdown:
245-
raise RuntimeError(f"Conversion failed for {file_path_str}")
244+
raise UnexpectedError(f"Conversion failed for {file_path_str}")
246245

247246
if self.image_captioning:
248247
keys = list(images.keys())
@@ -271,9 +270,8 @@ async def aload_document(
271270
logger.info("PDF loading cancelled", path=file_path_str)
272271
raise
273272
except OSError as e:
274-
# File I/O error
275273
logger.error("Cannot read PDF file", path=file_path_str, error=str(e))
276-
raise RuntimeError(f"Cannot read PDF file: {e}")
274+
raise FileStorageError(f"Cannot read PDF file: {e}") from e
277275
except Exception:
278276
# Ray actor errors or PDF processing failures
279277
logger.exception("Error in aload_document", path=file_path_str)

openrag/components/indexer/loaders/serializer.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import torch
66
from config import load_config
77
from langchain_core.documents.base import Document
8+
from utils.exceptions.common import FileStorageError, UnexpectedError
89

910
from . import get_loader_classes
1011

@@ -85,10 +86,8 @@ async def serialize_document(
8586
log.info("Document serialized successfully")
8687
return doc
8788
except OSError as e:
88-
# File operation failed (file not found, permission denied, etc.)
8989
log.error("File operation failed during serialization", path=str(path), error=str(e))
90-
raise RuntimeError(f"Cannot read file: {e}")
90+
raise FileStorageError(f"Cannot read file: {e}") from e
9191
except Exception as e:
92-
# Loader-specific errors or unexpected failures
9392
log.exception("Failed to serialize document", path=str(path), file_type=file_ext, error=str(e))
94-
raise RuntimeError("Failed to serialize document: unsupported format or corrupted file")
93+
raise UnexpectedError("Failed to serialize document: unsupported format or corrupted file") from e

openrag/components/indexer/vectordb/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def __init__(self, database_url: str, logger=logger):
152152
AUTH_TOKEN = os.getenv("AUTH_TOKEN")
153153
self._ensure_admin_user(AUTH_TOKEN)
154154

155-
except Exception as e:
155+
except Exception:
156156
raise VDBConnectionError(
157157
"An unexpected database error occurred",
158158
db_url=str(database_url),

openrag/components/llm.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44

55
import httpx
6+
from utils.exceptions.common import UnexpectedError
67
from utils.logger import get_logger
78

89
logger = get_logger()
@@ -38,10 +39,10 @@ async def completions(self, request: dict):
3839
data = response.json()
3940
yield data
4041
except httpx.HTTPStatusError as e:
41-
error_detail = e.response.text
42-
raise ValueError(f"LLM API error ({e.response.status_code}): {error_detail}")
42+
logger.error("LLM API returned error", status_code=e.response.status_code)
43+
raise UnexpectedError(f"LLM API error ({e.response.status_code})") from e
4344
except json.JSONDecodeError as e:
44-
raise ValueError(f"Invalid JSON in API response: {str(e)}")
45+
raise UnexpectedError("Invalid JSON in LLM API response") from e
4546

4647
async def chat_completion(self, request: dict):
4748
request.pop("model")
@@ -71,17 +72,16 @@ async def chat_completion(self, request: dict):
7172
except httpx.HTTPStatusError as e:
7273
# 4xx/5xx responses
7374
logger.error("LLM API returned error", status_code=e.response.status_code)
74-
raise
75+
raise UnexpectedError(f"LLM API error ({e.response.status_code})") from e
7576

7677
except httpx.RequestError as e:
7778
# Network/connection failures
7879
logger.error("Network error during LLM streaming", error=str(e))
79-
raise
80+
raise UnexpectedError("Network error during LLM streaming") from e
8081

8182
except Exception as e:
82-
# Truly unexpected errors
8383
logger.exception("Unexpected error during LLM streaming")
84-
raise RuntimeError("An unexpected error occurred during streaming")
84+
raise UnexpectedError("An unexpected error occurred during streaming") from e
8585

8686
else: # Handle non-streaming response
8787
try:
@@ -94,7 +94,7 @@ async def chat_completion(self, request: dict):
9494
data = response.json()
9595
yield data
9696
except httpx.HTTPStatusError as e:
97-
error_detail = e.response.text
98-
raise ValueError(f"LLM API error ({e.response.status_code}): {error_detail}")
97+
logger.error("LLM API returned error", status_code=e.response.status_code)
98+
raise UnexpectedError(f"LLM API error ({e.response.status_code})") from e
9999
except json.JSONDecodeError as e:
100-
raise ValueError(f"Invalid JSON in API response: {str(e)}")
100+
raise UnexpectedError("Invalid JSON in LLM API response") from e

0 commit comments

Comments
 (0)