Skip to content

Commit 310968d

Browse files
committed
feat(pptx): implement error handling and logging in PPTX parser
- Added custom exceptions for PPTX parsing errors, including PptxExtractionError, PptxExtractorError, PptxParserError, PptxPresentationError, and PptxSlideProcessingError. - Enhanced the PptxDocumentParser to raise appropriate exceptions during parsing failures and log detailed error messages. - Improved logging throughout the extraction process to track successful and failed extractions, including shape processing and metadata extraction. - Updated extractor classes to handle errors gracefully and provide informative logs for debugging.
1 parent c7fa386 commit 310968d

File tree

4 files changed

+367
-69
lines changed

4 files changed

+367
-69
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
1+
from .exceptions import (
2+
PptxExtractionError,
3+
PptxExtractorError,
4+
PptxParserError,
5+
PptxPresentationError,
6+
PptxSlideProcessingError,
7+
)
18
from .parser import PptxDocumentParser
29

310
__all__ = [
411
"PptxDocumentParser",
12+
"PptxExtractionError",
13+
"PptxExtractorError",
14+
"PptxParserError",
15+
"PptxPresentationError",
16+
"PptxSlideProcessingError",
517
]
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
from ragbits.document_search.ingestion.parsers.exceptions import ParserError
2+
3+
4+
class PptxParserError(ParserError):
5+
"""
6+
Base class for all PPTX parser related exceptions.
7+
"""
8+
9+
10+
class PptxExtractionError(PptxParserError):
11+
"""
12+
Raised when an extractor fails to extract content from a shape or slide.
13+
"""
14+
15+
def __init__(self, extractor_name: str, slide_idx: int, shape_info: str, original_error: Exception) -> None:
16+
"""
17+
Initialize the PptxExtractionError.
18+
19+
Args:
20+
extractor_name: Name of the extractor that failed.
21+
slide_idx: Index of the slide where extraction failed.
22+
shape_info: Information about the shape that caused the failure.
23+
original_error: The original exception that caused the failure.
24+
"""
25+
message = (
26+
f"Extractor '{extractor_name}' failed to extract content from slide {slide_idx}. "
27+
f"Shape info: {shape_info}. Original error: {original_error}"
28+
)
29+
super().__init__(message)
30+
self.extractor_name = extractor_name
31+
self.slide_idx = slide_idx
32+
self.shape_info = shape_info
33+
self.original_error = original_error
34+
35+
36+
class PptxSlideProcessingError(PptxParserError):
37+
"""
38+
Raised when processing of an entire slide fails.
39+
"""
40+
41+
def __init__(self, extractor_name: str, slide_idx: int, original_error: Exception) -> None:
42+
"""
43+
Initialize the PptxSlideProcessingError.
44+
45+
Args:
46+
extractor_name: Name of the extractor that failed.
47+
slide_idx: Index of the slide that failed to process.
48+
original_error: The original exception that caused the failure.
49+
"""
50+
message = (
51+
f"Extractor '{extractor_name}' failed to process slide {slide_idx}. " f"Original error: {original_error}"
52+
)
53+
super().__init__(message)
54+
self.extractor_name = extractor_name
55+
self.slide_idx = slide_idx
56+
self.original_error = original_error
57+
58+
59+
class PptxPresentationError(PptxParserError):
60+
"""
61+
Raised when the PPTX presentation cannot be loaded or processed.
62+
"""
63+
64+
def __init__(self, file_path: str, original_error: Exception) -> None:
65+
"""
66+
Initialize the PptxPresentationError.
67+
68+
Args:
69+
file_path: Path to the PPTX file that failed to load.
70+
original_error: The original exception that caused the failure.
71+
"""
72+
message = f"Failed to load or process PPTX presentation from '{file_path}'. Original error: {original_error}"
73+
super().__init__(message)
74+
self.file_path = file_path
75+
self.original_error = original_error
76+
77+
78+
class PptxExtractorError(PptxParserError):
79+
"""
80+
Raised when an extractor fails completely.
81+
"""
82+
83+
def __init__(self, extractor_name: str, original_error: Exception) -> None:
84+
"""
85+
Initialize the PptxExtractorError.
86+
87+
Args:
88+
extractor_name: Name of the extractor that failed.
89+
original_error: The original exception that caused the failure.
90+
"""
91+
message = f"Extractor '{extractor_name}' failed completely. Original error: {original_error}"
92+
super().__init__(message)
93+
self.extractor_name = extractor_name
94+
self.original_error = original_error

0 commit comments

Comments
 (0)