Skip to content

Commit 7d69afd

Browse files
committed
adding todo note
1 parent e3e9279 commit 7d69afd

File tree

1 file changed

+1
-0
lines changed

1 file changed

+1
-0
lines changed

bbot/modules/internal/excavate.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,7 @@ async def handle_event(self, event, **kwargs):
12451245
# skip PDF responses -- running YARA/regex on raw PDF bytes produces false positives and wastes time.
12461246
# PDFs are still processed correctly via the filedownload → extractous → RAW_TEXT pipeline,
12471247
# which extracts readable text and feeds it back to excavate as a RAW_TEXT event (handled separately below).
1248+
# TODO: remove this in favor of a proper categorization system for text vs non-text (i.e. to-be-extracted) content
12481249
if content_type and "application/pdf" in content_type.lower():
12491250
self.debug(f"Skipping PDF response: {event.data.get('url', 'unknown')}")
12501251
return

0 commit comments

Comments
 (0)