|
4 | 4 | """Tests for the FakeEmailAnalyzer heuristic.""" |
5 | 5 |
|
6 | 6 |
|
| 7 | +import os |
| 8 | +from pathlib import Path |
7 | 9 | from unittest.mock import MagicMock |
8 | 10 |
|
9 | 11 | import pytest |
10 | 12 |
|
| 13 | +from macaron.config.defaults import load_defaults |
11 | 14 | from macaron.errors import HeuristicAnalyzerValueError |
12 | 15 | from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult |
13 | 16 | from macaron.malware_analyzer.pypi_heuristics.metadata.fake_email import FakeEmailAnalyzer |
14 | | -from macaron.slsa_analyzer.package_registry.pypi_registry import PyPIPackageJsonAsset |
| 17 | + |
| 18 | +# If check_deliverability is True, DNS-based check is enabled for email_validator. |
| 19 | +# If check_deliverability is True and no network is available, email_validator will perform DNS-based check |
| 20 | +# but the DNS queries will time out. |
| 21 | +# email_validator doesn't treat timeout as a validation failure: |
| 22 | +# https://github.com/JoshData/python-email-validator/blob/98800bac023b8713351393a5043034065f1ea6cb/email_validator/deliverability.py#L144 |
| 23 | +# Therefore, FakeEmailAnalyzer.is_valid_email doesn't return None, and we will treat this as an "valid email". |
| 24 | +# This has a risk of treating an invalid email as valid when an unexpected timeout occurs. |
| 25 | +# We have ensured that check_deliverability is always False for all unit test cases in this module, so that behavior |
| 26 | +# is not expected to happen. |
| 27 | + |
| 28 | + |
| 29 | +@pytest.fixture(name="fake_email_defaults_override") |
| 30 | +def set_defaults_(tmp_path: Path) -> None: |
| 31 | + """Disable check_deliverability in defaults.ini so we do not make network connections. |
| 32 | +
|
| 33 | + Parameters |
| 34 | + ---------- |
| 35 | + tmp_path: Path |
| 36 | + Pytest temporary path fixture. |
| 37 | + """ |
| 38 | + defaults_file = Path(os.path.join(tmp_path, "config.ini")) |
| 39 | + content = """ |
| 40 | + [heuristic.pypi] |
| 41 | + check_deliverability = False |
| 42 | + """ |
| 43 | + defaults_file.write_text(content, encoding="utf-8") |
| 44 | + assert load_defaults(str(defaults_file)) is True |
15 | 45 |
|
16 | 46 |
|
17 | 47 | @pytest.fixture(name="analyzer") |
18 | | -def analyzer_() -> FakeEmailAnalyzer: |
| 48 | +def analyzer_(fake_email_defaults_override: None) -> FakeEmailAnalyzer: # pylint: disable=unused-argument |
19 | 49 | """Pytest fixture to create a FakeEmailAnalyzer instance.""" |
20 | 50 | return FakeEmailAnalyzer() |
21 | 51 |
|
22 | 52 |
|
23 | | -@pytest.fixture(name="pypi_package_json_asset_mock") |
24 | | -def pypi_package_json_asset_mock_() -> MagicMock: |
25 | | - """Pytest fixture for a mock PyPIPackageJsonAsset.""" |
26 | | - mock_asset = MagicMock(spec=PyPIPackageJsonAsset) |
27 | | - mock_asset.package_json = {} |
28 | | - return mock_asset |
29 | | - |
30 | | - |
31 | | -def test_analyze_skip_no_emails_present(analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock) -> None: |
32 | | - """Test the analyzer skips if no author_email or maintainer_email is present.""" |
33 | | - pypi_package_json_asset_mock.package_json = {"info": {"author_email": None, "maintainer_email": None}} |
34 | | - result, info = analyzer.analyze(pypi_package_json_asset_mock) |
| 53 | +def test_missing_info(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: |
| 54 | + """Test when JSON 'info' key is missing in the PyPI data (should error). |
| 55 | +
|
| 56 | + Parameters |
| 57 | + ---------- |
| 58 | + pypi_package_json: MagicMock |
| 59 | + The PyPIPackageJsonAsset MagicMock fixture. |
| 60 | + analyzer: FakeEmailAnalyzer |
| 61 | + An initialized FakeEmailAnalyzer instance. |
| 62 | + """ |
| 63 | + pypi_package_json.package_json = {} # No 'info' key |
| 64 | + with pytest.raises(HeuristicAnalyzerValueError): |
| 65 | + analyzer.analyze(pypi_package_json) |
| 66 | + |
| 67 | + |
| 68 | +def test_no_emails_present(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: |
| 69 | + """Test when no author_email or maintainer_email is present (should skip). |
| 70 | +
|
| 71 | + Parameters |
| 72 | + ---------- |
| 73 | + pypi_package_json: MagicMock |
| 74 | + The PyPIPackageJsonAsset MagicMock fixture. |
| 75 | + analyzer: FakeEmailAnalyzer |
| 76 | + An initialized FakeEmailAnalyzer instance. |
| 77 | + """ |
| 78 | + pypi_package_json.package_json = {"info": {"author_email": None, "maintainer_email": None}} |
| 79 | + result, _ = analyzer.analyze(pypi_package_json) |
35 | 80 | assert result == HeuristicResult.SKIP |
36 | | - assert info["message"] == "No author or maintainer email available." |
37 | | - |
38 | | - |
39 | | -def test_analyze_raises_error_for_missing_info_key( |
40 | | - analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock |
41 | | -) -> None: |
42 | | - """Test the analyzer raises an error if the 'info' key is missing in the PyPI data.""" |
43 | | - pypi_package_json_asset_mock.package_json = {} # No 'info' key |
44 | | - with pytest.raises(HeuristicAnalyzerValueError) as exc_info: |
45 | | - analyzer.analyze(pypi_package_json_asset_mock) |
46 | | - assert "No package info available." in str(exc_info.value) |
47 | | - |
48 | | - |
49 | | -def test_analyze_fail_no_email_found_in_field( |
50 | | - analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock |
51 | | -) -> None: |
52 | | - """Test the analyzer fails if an email field does not contain a parsable email address.""" |
53 | | - pypi_package_json_asset_mock.package_json = {"info": {"author_email": "not an email", "maintainer_email": None}} |
54 | | - result, info = analyzer.analyze(pypi_package_json_asset_mock) |
55 | | - assert result == HeuristicResult.FAIL |
56 | | - assert info == {"message": "no emails found in the email field"} |
57 | 81 |
|
58 | 82 |
|
59 | | -def test_analyze_fail_invalid_email(analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock) -> None: |
60 | | - """Test analyzer fails if the email field contains an invalid email format.""" |
61 | | - invalid_email = "user@example" |
62 | | - pypi_package_json_asset_mock.package_json = {"info": {"author_email": invalid_email, "maintainer_email": None}} |
| 83 | +def test_non_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: |
| 84 | + """Test with a non-parsable email address (should fail). |
63 | 85 |
|
64 | | - result, info = analyzer.analyze(pypi_package_json_asset_mock) |
| 86 | + Parameters |
| 87 | + ---------- |
| 88 | + pypi_package_json: MagicMock |
| 89 | + The PyPIPackageJsonAsset MagicMock fixture. |
| 90 | + analyzer: FakeEmailAnalyzer |
| 91 | + An initialized FakeEmailAnalyzer instance. |
| 92 | + """ |
| 93 | + pypi_package_json.package_json = {"info": {"author_email": "not an email", "maintainer_email": "also not an email"}} |
| 94 | + result, info = analyzer.analyze(pypi_package_json) |
65 | 95 | assert result == HeuristicResult.FAIL |
66 | | - assert info == {"message": "no emails found in the email field"} |
67 | | - |
68 | 96 |
|
69 | | -def test_analyze_pass_only_maintainer_email_valid( |
70 | | - analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock |
71 | | -) -> None: |
72 | | - """Test the analyzer passes if only a valid maintainer_email is present and deliverability is not checked.""" |
73 | | - |
74 | | - pypi_package_json_asset_mock.package_json = {"info": {"author_email": None, "maintainer_email": email}} |
75 | | - result, info = analyzer.analyze(pypi_package_json_asset_mock) |
| 97 | + # assert types (for mypy) |
| 98 | + assert isinstance(info["non_emails"], list) |
76 | 99 |
|
77 | | - if analyzer.check_deliverability: |
78 | | - assert result == HeuristicResult.FAIL |
79 | | - assert info == {"invalid_email": email} |
80 | | - return |
81 | | - |
82 | | - assert result == HeuristicResult.PASS |
83 | | - assert info["validated_emails"] == [ |
84 | | - { "normalized": "[email protected]", "local_part": "maintainer", "domain": "example.net"} |
85 | | - ] |
| 100 | + assert "not an email" in info["non_emails"] |
| 101 | + assert "also not an email" in info["non_emails"] |
86 | 102 |
|
87 | 103 |
|
88 | | -def test_analyze_pass_both_emails_valid(analyzer: FakeEmailAnalyzer, pypi_package_json_asset_mock: MagicMock) -> None: |
89 | | - """Test the analyzer passes if both emails are valid and deliverability is not checked.""" |
90 | | - author_email = "[email protected]" |
91 | | - author_local_part, author_domain = author_email.split("@") |
92 | | - maintainer_email = "[email protected]" |
93 | | - maintainer_local_part, maintainer_domain = maintainer_email.split("@") |
| 104 | +def test_valid_email(pypi_package_json: MagicMock, analyzer: FakeEmailAnalyzer) -> None: |
| 105 | + """Test with valid email address format (should pass). |
94 | 106 |
|
95 | | - pypi_package_json_asset_mock.package_json = { |
96 | | - "info": {"author_email": author_email, "maintainer_email": maintainer_email} |
| 107 | + Parameters |
| 108 | + ---------- |
| 109 | + pypi_package_json: MagicMock |
| 110 | + The PyPIPackageJsonAsset MagicMock fixture. |
| 111 | + analyzer: FakeEmailAnalyzer |
| 112 | + An initialized FakeEmailAnalyzer instance. |
| 113 | + """ |
| 114 | + pypi_package_json.package_json = { |
| 115 | + "info": { |
| 116 | + "author_email": "[email protected]", |
| 117 | + "maintainer_email": "[email protected]", |
| 118 | + } |
97 | 119 | } |
98 | | - result, info = analyzer.analyze(pypi_package_json_asset_mock) |
99 | | - if analyzer.check_deliverability: |
100 | | - assert result == HeuristicResult.FAIL |
101 | | - assert info == {"invalid_email": maintainer_email} |
102 | | - return |
103 | | - |
| 120 | + result, info = analyzer.analyze(pypi_package_json) |
104 | 121 | assert result == HeuristicResult.PASS |
105 | 122 |
|
106 | | - validated_emails = info.get("validated_emails") |
107 | | - assert isinstance(validated_emails, list) |
108 | | - assert len(validated_emails) == 2 |
109 | | - assert {"normalized": author_email, "local_part": author_local_part, "domain": author_domain} in validated_emails |
110 | | - assert { |
111 | | - "normalized": maintainer_email, |
112 | | - "local_part": maintainer_local_part, |
113 | | - "domain": maintainer_domain, |
114 | | - } in validated_emails |
115 | | - |
| 123 | + # assert types (for mypy) |
| 124 | + assert isinstance(info["valid_emails"], list) |
116 | 125 |
|
117 | | -def test_is_valid_email_failure(analyzer: FakeEmailAnalyzer) -> None: |
118 | | - """Test is_valid_email returns None on failure.""" |
119 | | - result = analyzer.is_valid_email("invalid-email") |
120 | | - assert result is None |
| 126 | + assert "[email protected]" in info[ "valid_emails"] |
| 127 | + assert "[email protected]" in info[ "valid_emails"] |
121 | 128 |
|
122 | 129 |
|
123 | 130 | def test_get_emails(analyzer: FakeEmailAnalyzer) -> None: |
124 | | - """Test the get_emails method.""" |
125 | | - |
126 | | - |
| 131 | + """Test the get_emails method extracts emails from text correctly. |
| 132 | +
|
| 133 | + analyzer: FakeEmailAnalyzer |
| 134 | + An initialized FakeEmailAnalyzer instance. |
| 135 | + """ |
| 136 | + email_field = "[email protected], Another User <[email protected]>, please also email [email protected] thanks!" |
| 137 | + |
127 | 138 | assert analyzer.get_emails(email_field) == expected |
128 | 139 |
|
129 | 140 | email_field_no_email = "this is not an email" |
|
0 commit comments