25
25
class BasePptxExtractor (ABC ):
26
26
"""Base class for all PPTX content extractors."""
27
27
28
- def _get_slides (self , presentation : Presentation , slide : Slide | None = None ) -> list [tuple [int , Slide ]]:
28
+ @staticmethod
29
+ def _get_slides (presentation : Presentation , slide : Slide | None = None ) -> list [tuple [int , Slide ]]:
29
30
"""Get slides with their indices."""
30
31
slides = [slide ] if slide else list (presentation .slides )
31
32
return list (enumerate (slides , start = 1 ))
32
33
33
- def _get_shape_info (self , shape : BaseShape ) -> str :
34
+ @staticmethod
35
+ def _get_shape_info (shape : BaseShape ) -> str :
34
36
"""Get descriptive information about a shape for logging purposes."""
35
37
try :
36
38
shape_type = getattr (shape , "shape_type" , "unknown" )
@@ -40,8 +42,8 @@ def _get_shape_info(self, shape: BaseShape) -> str:
40
42
except Exception :
41
43
return "unknown_shape"
42
44
45
+ @staticmethod
43
46
def _create_text_element (
44
- self ,
45
47
element_type : str ,
46
48
document_meta : DocumentMeta ,
47
49
content : str ,
@@ -177,7 +179,8 @@ def get_extractor_name(self) -> str:
177
179
class PptxTextExtractor (BasePptxExtractor ):
178
180
"""Extracts text content from text frames."""
179
181
180
- def _extract_text_content (self , shape : BaseShape ) -> str | None :
182
+ @staticmethod
183
+ def _extract_text_content (shape : BaseShape ) -> str | None :
181
184
"""Extract text content from a shape."""
182
185
if not isinstance (shape , Shape ):
183
186
return None
@@ -201,15 +204,17 @@ def extract(
201
204
logger .error ("Text extraction failed: %s" , str (e ), exc_info = True )
202
205
raise PptxExtractorError (self .get_extractor_name (), e ) from e
203
206
204
- def get_extractor_name (self ) -> str :
207
+ @staticmethod
208
+ def get_extractor_name () -> str :
205
209
"""Get the name of this extractor."""
206
210
return "pptx_text_extractor"
207
211
208
212
209
213
class PptxHyperlinkExtractor (BasePptxExtractor ):
210
214
"""Extracts hyperlink addresses from shapes."""
211
215
212
- def _extract_hyperlink_content (self , shape : BaseShape ) -> str | None :
216
+ @staticmethod
217
+ def _extract_hyperlink_content (shape : BaseShape ) -> str | None :
213
218
"""Extract hyperlink content from a shape."""
214
219
if not hasattr (shape , "click_action" ) or isinstance (shape , GroupShape ):
215
220
return None
@@ -229,15 +234,17 @@ def extract(
229
234
element_type = "hyperlink" ,
230
235
)
231
236
232
- def get_extractor_name (self ) -> str :
237
+ @staticmethod
238
+ def get_extractor_name () -> str :
233
239
"""Get the name of this extractor."""
234
240
return "pptx_hyperlink_extractor"
235
241
236
242
237
243
class PptxImageExtractor (BasePptxExtractor ):
238
244
"""Extracts image information from shapes."""
239
245
240
- def _extract_image_content (self , shape : BaseShape ) -> str | None :
246
+ @staticmethod
247
+ def _extract_image_content (shape : BaseShape ) -> str | None :
241
248
"""Extract image content from a shape."""
242
249
if not isinstance (shape , Picture ):
243
250
return None
@@ -258,15 +265,17 @@ def extract(
258
265
element_type = "image" ,
259
266
)
260
267
261
- def get_extractor_name (self ) -> str :
268
+ @staticmethod
269
+ def get_extractor_name () -> str :
262
270
"""Get the name of this extractor."""
263
271
return "pptx_image_extractor"
264
272
265
273
266
274
class PptxShapeExtractor (BasePptxExtractor ):
267
275
"""Extracts shape information and metadata."""
268
276
269
- def _extract_shape_content (self , shape : BaseShape ) -> str | None :
277
+ @staticmethod
278
+ def _extract_shape_content (shape : BaseShape ) -> str | None :
270
279
"""Extract shape metadata from a shape."""
271
280
if not hasattr (shape , "shape_type" ):
272
281
return None
@@ -284,7 +293,8 @@ def extract(
284
293
element_type = "shape" ,
285
294
)
286
295
287
- def get_extractor_name (self ) -> str :
296
+ @staticmethod
297
+ def get_extractor_name () -> str :
288
298
"""Get the name of this extractor."""
289
299
return "pptx_shape_extractor"
290
300
@@ -327,7 +337,8 @@ def extract(
327
337
logger .error ("Metadata extraction failed: %s" , str (e ), exc_info = True )
328
338
raise PptxExtractorError (self .get_extractor_name (), e ) from e
329
339
330
- def get_extractor_name (self ) -> str :
340
+ @staticmethod
341
+ def get_extractor_name () -> str :
331
342
"""Get the name of this extractor."""
332
343
return "pptx_metadata_extractor"
333
344
@@ -389,7 +400,8 @@ def extract(
389
400
logger .error ("Speaker notes extraction failed: %s" , str (e ), exc_info = True )
390
401
raise PptxExtractorError (self .get_extractor_name (), e ) from e
391
402
392
- def get_extractor_name (self ) -> str :
403
+ @staticmethod
404
+ def get_extractor_name () -> str :
393
405
"""Get the name of this extractor."""
394
406
return "pptx_speaker_notes_extractor"
395
407
0 commit comments