|
3 | 3 | import io |
4 | 4 | import os |
5 | 5 | import pickle |
| 6 | +import re |
6 | 7 | import sys |
7 | 8 | from typing import Union |
8 | 9 | from unittest import TestCase |
|
14 | 15 | from picklescan.scanner import ( |
15 | 16 | Global, |
16 | 17 | SafetyLevel, |
| 18 | + ScanFilter, |
17 | 19 | ScanResult, |
18 | 20 | _http_get, |
19 | 21 | _list_globals, |
@@ -586,3 +588,95 @@ def test_not_a_pickle_file(): |
586 | 588 | # File is not a valid pickle, but scanner should not error - just report no threats |
587 | 589 | not_a_pickle = ScanResult([], scanned_files=1, issues_count=0, infected_files=0, scan_err=False) |
588 | 590 | compare_scan_results(scan_file_path(f"{_root_path}/data/not_a_pickle.bin"), not_a_pickle) |
| 591 | + |
| 592 | + |
| 593 | +# --------------------------------------------------------------------------- |
| 594 | +# Tests for scan_directory_path with ScanFilter (--include/--exclude support) |
| 595 | +# --------------------------------------------------------------------------- |
| 596 | + |
| 597 | + |
| 598 | +def test_scan_directory_exclude_file(): |
| 599 | + """--exclude skips files whose full path matches the regex.""" |
| 600 | + # Exclude all .zip files – only .pkl/.pickle/.pt/.bin/.7z remain |
| 601 | + sf = ScanFilter(exclude=[re.compile(r"\.zip$")]) |
| 602 | + sr = scan_directory_path(f"{_root_path}/data/", scan_filter=sf) |
| 603 | + # No .zip file should have been scanned |
| 604 | + assert sr.scanned_files > 0 |
| 605 | + # The unfiltered scan has 44 scanned files (from test_scan_directory_path); |
| 606 | + # we just verify that some files were dropped. |
| 607 | + unfiltered = scan_directory_path(f"{_root_path}/data/") |
| 608 | + assert sr.scanned_files < unfiltered.scanned_files |
| 609 | + |
| 610 | + |
| 611 | +def test_scan_directory_include_file(): |
| 612 | + """--include restricts scans to files whose path matches the regex.""" |
| 613 | + # Only scan benign .pkl files |
| 614 | + sf = ScanFilter(include=[re.compile(r"benign0_v3\.pkl$")]) |
| 615 | + sr = scan_directory_path(f"{_root_path}/data/", scan_filter=sf) |
| 616 | + assert sr.scanned_files == 1 |
| 617 | + assert sr.issues_count == 0 |
| 618 | + |
| 619 | + |
| 620 | +def test_scan_directory_exclude_wins_over_include(): |
| 621 | + """Excludes always take precedence over includes (ClamAV semantics).""" |
| 622 | + sf = ScanFilter( |
| 623 | + include=[re.compile(r"benign0_v3\.pkl$")], |
| 624 | + exclude=[re.compile(r"benign")], |
| 625 | + ) |
| 626 | + sr = scan_directory_path(f"{_root_path}/data/", scan_filter=sf) |
| 627 | + assert sr.scanned_files == 0 |
| 628 | + |
| 629 | + |
| 630 | +def test_scan_directory_exclude_dir(): |
| 631 | + """--exclude-dir prevents traversal into matching directories.""" |
| 632 | + # Scanning the parent tests/ directory but excluding 'data2' |
| 633 | + sf = ScanFilter(exclude_dir=[re.compile(r"data2")]) |
| 634 | + sr = scan_directory_path(f"{_root_path}/", scan_filter=sf) |
| 635 | + # Should still find files in data/ but none from data2/ |
| 636 | + assert sr.scanned_files > 0 |
| 637 | + # Compare with an include_dir that only allows data/ |
| 638 | + sf2 = ScanFilter(include_dir=[re.compile(r"/data$")]) |
| 639 | + sr2 = scan_directory_path(f"{_root_path}/", scan_filter=sf2) |
| 640 | + assert sr2.scanned_files > 0 |
| 641 | + # Both should give the same set of scanned files (only data/) |
| 642 | + assert sr.scanned_files == sr2.scanned_files |
| 643 | + |
| 644 | + |
| 645 | +def test_scan_directory_include_dir(): |
| 646 | + """--include-dir restricts which directories are traversed.""" |
| 647 | + # Only descend into data2/ |
| 648 | + sf = ScanFilter(include_dir=[re.compile(r"data2")]) |
| 649 | + sr = scan_directory_path(f"{_root_path}/", scan_filter=sf) |
| 650 | + assert sr.scanned_files > 0 |
| 651 | + |
| 652 | + # Verify data/ files are NOT included by scanning only data/ and comparing |
| 653 | + sf_data_only = ScanFilter(include_dir=[re.compile(r"/data$")]) |
| 654 | + sr_data = scan_directory_path(f"{_root_path}/", scan_filter=sf_data_only) |
| 655 | + # data2 results should differ from data-only results |
| 656 | + assert sr.scanned_files != sr_data.scanned_files |
| 657 | + |
| 658 | + |
| 659 | +def test_scan_directory_multiple_patterns(): |
| 660 | + """Multiple patterns of the same kind are combined with logical OR.""" |
| 661 | + sf = ScanFilter( |
| 662 | + include=[re.compile(r"benign0_v3\.pkl$"), re.compile(r"benign0_v4\.pkl$")], |
| 663 | + ) |
| 664 | + sr = scan_directory_path(f"{_root_path}/data/", scan_filter=sf) |
| 665 | + assert sr.scanned_files == 2 |
| 666 | + assert sr.issues_count == 0 |
| 667 | + |
| 668 | + |
| 669 | +def test_scan_directory_no_filter(): |
| 670 | + """Passing no filter (None) gives the same result as default behaviour.""" |
| 671 | + sr_none = scan_directory_path(f"{_root_path}/data/", scan_filter=None) |
| 672 | + sr_default = scan_directory_path(f"{_root_path}/data/") |
| 673 | + assert sr_none.scanned_files == sr_default.scanned_files |
| 674 | + assert sr_none.issues_count == sr_default.issues_count |
| 675 | + |
| 676 | + |
| 677 | +def test_scan_directory_empty_filter(): |
| 678 | + """An empty ScanFilter (no patterns) behaves like no filter at all.""" |
| 679 | + sf = ScanFilter() |
| 680 | + sr = scan_directory_path(f"{_root_path}/data/", scan_filter=sf) |
| 681 | + sr_default = scan_directory_path(f"{_root_path}/data/") |
| 682 | + assert sr.scanned_files == sr_default.scanned_files |
0 commit comments