Skip to content

Commit 7a43dfb

Browse files
paultranvanclaude
andcommitted
refactor(phase-3): replace RuntimeError with typed OpenRAGError subclasses
Replace all 12 RuntimeError raises introduced by phase-3 with proper OpenRAGError subclasses that flow through the global handler: - indexer.py: RuntimeError → UnexpectedError, VDBError/EmbeddingError catches widened to OpenRAGError - llm.py: RuntimeError → UnexpectedError for streaming failures - reranker.py: RuntimeError → UnexpectedError - serializer.py: RuntimeError → FileStorageError (I/O) / UnexpectedError - marker.py: RuntimeError → FileStorageError (I/O) / UnexpectedError - ray_utils.py: RuntimeError → RayActorError (wraps RayTaskError) - Fix pre-existing lint issues (unused vars, unsorted imports) All exception raises now use `from e` chaining for proper tracebacks. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 83574f0 commit 7a43dfb

File tree

11 files changed

+32
-61
lines changed

11 files changed

+32
-61
lines changed

openrag/components/indexer/chunker/chunker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ async def _generate_context(
7979
logger.error("VLM context generation failed", error=str(e))
8080
return ""
8181

82-
except Exception as e:
82+
except Exception:
8383
# Unexpected errors - log but still gracefully degrade
8484
logger.exception("Unexpected error during context generation")
8585
return ""
@@ -132,7 +132,7 @@ async def contextualize_chunks(
132132
for chunk, context in zip(chunks, contexts, strict=True)
133133
]
134134

135-
except Exception as e:
135+
except Exception:
136136
logger.exception("Error contextualizing chunks", filename=filename)
137137
return chunks
138138

openrag/components/indexer/embeddings/openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def embedding_dimension(self) -> int:
3232
logger.error("Invalid embedding response format", error=str(e))
3333
raise EmbeddingResponseError("Unexpected response format", error=str(e))
3434

35-
except Exception as e:
35+
except Exception:
3636
logger.exception("Unexpected error getting embedding dimension")
3737
raise UnexpectedEmbeddingError("An unexpected error occurred")
3838

openrag/components/indexer/indexer.py

Lines changed: 11 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
import torch
1111
from config import load_config
1212
from langchain_core.documents.base import Document
13-
from utils.exceptions.embeddings import EmbeddingError
14-
from utils.exceptions.vectordb import VDBError
13+
from utils.exceptions.base import OpenRAGError
14+
from utils.exceptions.common import UnexpectedError
1515

1616
from .chunker import BaseChunker, ChunkerFactory
1717
from .utils import serialize_file
@@ -121,25 +121,8 @@ async def add_file(
121121
# Mark task as completed
122122
await task_state_manager.set_state.remote(task_id, "COMPLETED")
123123

124-
except OSError as e:
125-
# File I/O errors (FileNotFoundError, PermissionError, etc.)
126-
log.error("File operation failed", path=path, error=str(e))
127-
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
128-
await task_state_manager.set_state.remote(task_id, "FAILED")
129-
await task_state_manager.set_error.remote(task_id, tb)
130-
raise
131-
132-
except VDBError as e:
133-
# Database errors (already typed from vectordb)
134-
log.error("Database operation failed", error=str(e))
135-
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
136-
await task_state_manager.set_state.remote(task_id, "FAILED")
137-
await task_state_manager.set_error.remote(task_id, tb)
138-
raise
139-
140-
except EmbeddingError as e:
141-
# Embedding errors (already typed from embeddings)
142-
log.error("Embedding generation failed", error=str(e))
124+
except OpenRAGError as e:
125+
log.error("Operation failed during file ingestion", code=e.code, error=e.message)
143126
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
144127
await task_state_manager.set_state.remote(task_id, "FAILED")
145128
await task_state_manager.set_error.remote(task_id, tb)
@@ -151,7 +134,7 @@ async def add_file(
151134
tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
152135
await task_state_manager.set_state.remote(task_id, "FAILED")
153136
await task_state_manager.set_error.remote(task_id, tb)
154-
raise RuntimeError("An unexpected error occurred during file processing")
137+
raise UnexpectedError("An unexpected error occurred during file processing") from e
155138

156139
finally:
157140
# GPU cleanup
@@ -185,15 +168,12 @@ async def delete_file(self, file_id: str, partition: str) -> bool:
185168
await vectordb.delete_file.remote(file_id, partition)
186169
log.info("Deleted file from partition.", file_id=file_id, partition=partition)
187170

188-
except VDBError as e:
189-
# Database errors (already typed from vectordb)
190-
log.error("Database operation failed in delete_file", error=str(e))
171+
except OpenRAGError:
191172
raise
192173

193174
except Exception as e:
194-
# Unexpected errors
195175
log.exception("Unexpected error in delete_file")
196-
raise RuntimeError("An unexpected error occurred during file deletion")
176+
raise UnexpectedError("An unexpected error occurred during file deletion") from e
197177

198178
@ray.method(concurrency_group="update")
199179
async def update_file_metadata(
@@ -219,15 +199,12 @@ async def update_file_metadata(
219199

220200
log.info("Metadata updated for file.")
221201

222-
except VDBError as e:
223-
# Database errors (already typed from vectordb)
224-
log.error("Database operation failed in update_file_metadata", error=str(e))
202+
except OpenRAGError:
225203
raise
226204

227205
except Exception as e:
228-
# Unexpected errors
229206
log.exception("Unexpected error in update_file_metadata")
230-
raise RuntimeError("An unexpected error occurred during metadata update")
207+
raise UnexpectedError("An unexpected error occurred during metadata update") from e
231208

232209
@ray.method(concurrency_group="update")
233210
async def copy_file(
@@ -258,15 +235,12 @@ async def copy_file(
258235
new_partition=metadata.get("partition"),
259236
)
260237

261-
except VDBError as e:
262-
# Database errors (already typed from vectordb)
263-
log.error("Database operation failed in copy_file", error=str(e))
238+
except OpenRAGError:
264239
raise
265240

266241
except Exception as e:
267-
# Unexpected errors
268242
log.exception("Unexpected error in copy_file")
269-
raise RuntimeError("An unexpected error occurred during file copy")
243+
raise UnexpectedError("An unexpected error occurred during file copy") from e
270244

271245
@ray.method(concurrency_group="search")
272246
async def asearch(

openrag/components/indexer/loaders/base.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import asyncio
22
import base64
3-
import binascii
43
import re
54
from abc import ABC, abstractmethod
65
from io import BytesIO

openrag/components/indexer/loaders/media_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
from components.utils import get_audio_semaphore
99
from langchain_core.documents.base import Document
10-
from openai import AsyncOpenAI, APIError
10+
from openai import APIError, AsyncOpenAI
1111
from pydub import AudioSegment, silence
1212
from tqdm.asyncio import tqdm
1313
from utils.logger import get_logger

openrag/components/indexer/loaders/pdf_loaders/marker.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from config import load_config
1010
from langchain_core.documents.base import Document
1111
from marker.converters.pdf import PdfConverter
12+
from utils.exceptions.common import FileStorageError, UnexpectedError
1213
from utils.logger import get_logger
1314

1415
from ..base import BaseLoader
@@ -107,13 +108,11 @@ def _process_pdf(file_path, config):
107108
logger.info("PDF processing cancelled", path=file_path)
108109
raise
109110
except OSError as e:
110-
# File I/O error
111111
logger.error("Cannot read PDF file", path=file_path, error=str(e))
112-
raise RuntimeError(f"Cannot read PDF file: {e}")
112+
raise FileStorageError(f"Cannot read PDF file: {e}") from e
113113
except Exception as e:
114-
# Marker library errors or unexpected failures
115114
logger.exception("Error processing PDF", path=file_path, error=str(e))
116-
raise RuntimeError("Failed to process PDF document")
115+
raise UnexpectedError("Failed to process PDF document") from e
117116
finally:
118117
gc.collect()
119118
if torch.cuda.is_available():
@@ -133,7 +132,7 @@ def run_with_timeout():
133132
return result
134133
except MPTimeoutError:
135134
self.logger.exception("MarkerWorker child process timed out", path=file_path)
136-
raise RuntimeError(f"PDF processing timed out")
135+
raise UnexpectedError("PDF processing timed out")
137136
except asyncio.CancelledError:
138137
# Cancellation - propagate
139138
self.logger.info("PDF processing cancelled", path=file_path)
@@ -242,7 +241,7 @@ async def aload_document(
242241
)
243242

244243
if not markdown:
245-
raise RuntimeError(f"Conversion failed for {file_path_str}")
244+
raise UnexpectedError(f"Conversion failed for {file_path_str}")
246245

247246
if self.image_captioning:
248247
keys = list(images.keys())
@@ -271,9 +270,8 @@ async def aload_document(
271270
logger.info("PDF loading cancelled", path=file_path_str)
272271
raise
273272
except OSError as e:
274-
# File I/O error
275273
logger.error("Cannot read PDF file", path=file_path_str, error=str(e))
276-
raise RuntimeError(f"Cannot read PDF file: {e}")
274+
raise FileStorageError(f"Cannot read PDF file: {e}") from e
277275
except Exception:
278276
# Ray actor errors or PDF processing failures
279277
logger.exception("Error in aload_document", path=file_path_str)

openrag/components/indexer/loaders/serializer.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import torch
66
from config import load_config
77
from langchain_core.documents.base import Document
8+
from utils.exceptions.common import FileStorageError, UnexpectedError
89

910
from . import get_loader_classes
1011

@@ -85,10 +86,8 @@ async def serialize_document(
8586
log.info("Document serialized successfully")
8687
return doc
8788
except OSError as e:
88-
# File operation failed (file not found, permission denied, etc.)
8989
log.error("File operation failed during serialization", path=str(path), error=str(e))
90-
raise RuntimeError(f"Cannot read file: {e}")
90+
raise FileStorageError(f"Cannot read file: {e}") from e
9191
except Exception as e:
92-
# Loader-specific errors or unexpected failures
9392
log.exception("Failed to serialize document", path=str(path), file_type=file_ext, error=str(e))
94-
raise RuntimeError("Failed to serialize document: unsupported format or corrupted file")
93+
raise UnexpectedError("Failed to serialize document: unsupported format or corrupted file") from e

openrag/components/indexer/vectordb/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def __init__(self, database_url: str, logger=logger):
152152
AUTH_TOKEN = os.getenv("AUTH_TOKEN")
153153
self._ensure_admin_user(AUTH_TOKEN)
154154

155-
except Exception as e:
155+
except Exception:
156156
raise VDBConnectionError(
157157
"An unexpected database error occurred",
158158
db_url=str(database_url),

openrag/components/llm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import json
44

55
import httpx
6+
from utils.exceptions.common import UnexpectedError
67
from utils.logger import get_logger
78

89
logger = get_logger()
@@ -79,9 +80,8 @@ async def chat_completion(self, request: dict):
7980
raise
8081

8182
except Exception as e:
82-
# Truly unexpected errors
8383
logger.exception("Unexpected error during LLM streaming")
84-
raise RuntimeError("An unexpected error occurred during streaming")
84+
raise UnexpectedError("An unexpected error occurred during streaming") from e
8585

8686
else: # Handle non-streaming response
8787
try:

openrag/components/ray_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import ray
55
from ray.exceptions import RayTaskError, TaskCancelledError
6+
from utils.exceptions.common import RayActorError
67
from utils.logger import get_logger
78

89
logger = get_logger()
@@ -33,7 +34,7 @@ async def call_ray_actor_with_timeout(
3334
TimeoutError: If the task exceeds the timeout
3435
asyncio.CancelledError: If the calling coroutine is cancelled
3536
TaskCancelledError: If the Ray task was cancelled
36-
RuntimeError: If the Ray task failed with an error
37+
RayActorError: If the Ray task failed with an error
3738
"""
3839
try:
3940
result = await asyncio.wait_for(asyncio.gather(future), timeout=timeout)
@@ -54,4 +55,4 @@ async def call_ray_actor_with_timeout(
5455
raise
5556

5657
except RayTaskError as e:
57-
raise RuntimeError(f"{task_description} failed") from e
58+
raise RayActorError(f"{task_description} failed") from e

0 commit comments

Comments
 (0)