Skip to content

Commit 6cc1062

Browse files
committed
refactor(pptx): rename ExtractedNotes to ExtractedSpeakerNotes and update related references
- Updated the naming of ExtractedNotes to ExtractedSpeakerNotes for clarity. - Adjusted method signatures and import statements accordingly in the parser and extractor files. - Added DEFAULT_EXTRACTORS for the new SpeakerNotesExtractor.
1 parent d3e21c0 commit 6cc1062

File tree

3 files changed

+7
-7
lines changed

3 files changed

+7
-7
lines changed

packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/pptx/extractors/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
ExtractedText,
99
)
1010
from .extractors import (
11+
DEFAULT_EXTRACTORS,
1112
BaseExtractor,
1213
HyperlinkExtractor,
1314
ImageExtractor,
@@ -18,6 +19,7 @@
1819
)
1920

2021
__all__ = [
22+
"DEFAULT_EXTRACTORS",
2123
"BaseExtractor",
2224
"ExtractedHyperlink",
2325
"ExtractedImage",

packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/pptx/extractors/extractors.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
import io
44
from abc import ABC, abstractmethod
5-
from dataclasses import dataclass, field
6-
from datetime import datetime
75
from typing import Any
86

97
from PIL import Image
@@ -341,7 +339,7 @@ def get_extractor_name(self) -> str:
341339
class SpeakerNotesExtractor(BaseExtractor):
342340
"""Extracts speaker notes from slides."""
343341

344-
def extract(self, presentation: Presentation, slide: Slide | None = None) -> list[ExtractedNotes]:
342+
def extract(self, presentation: Presentation, slide: Slide | None = None) -> list[ExtractedSpeakerNotes]:
345343
"""Extract notes from all slides or a specific slide."""
346344
extracted_notes = []
347345

@@ -365,7 +363,7 @@ def extract(self, presentation: Presentation, slide: Slide | None = None) -> lis
365363
if notes_text.strip():
366364
formatting = self._extract_notes_formatting(notes_slide.notes_text_frame)
367365
extracted_notes.append(
368-
ExtractedNotes(content=notes_text, slide_index=slide_idx, formatting=formatting)
366+
ExtractedSpeakerNotes(content=notes_text, slide_index=slide_idx, formatting=formatting)
369367
)
370368

371369
return extracted_notes
@@ -452,7 +450,7 @@ def get_extractor_name(self) -> str:
452450
HyperlinkExtractor(),
453451
ImageExtractor(),
454452
ShapeExtractor(),
455-
NotesExtractor(),
453+
SpeakerNotesExtractor(),
456454
MetadataExtractor(),
457455
SlideImageExtractor(),
458456
]

packages/ragbits-document-search/src/ragbits/document_search/ingestion/parsers/pptx/parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
ExtractedHyperlink,
1515
ExtractedImage,
1616
ExtractedMetadata,
17-
ExtractedNotes,
17+
ExtractedSpeakerNotes,
1818
ExtractedText,
1919
)
2020

@@ -349,7 +349,7 @@ def _process_image_content(self, image_data: list[ExtractedImage], document: Doc
349349

350350
return elements
351351

352-
def _process_notes_content(self, notes_data: list[ExtractedNotes], document: Document) -> list[TextElement]:
352+
def _process_notes_content(self, notes_data: list[ExtractedSpeakerNotes], document: Document) -> list[TextElement]:
353353
"""
354354
Process extracted notes content into TextElement objects.
355355

0 commit comments

Comments
 (0)