Skip to content

Commit b3f7d07

Browse files
authored
refactor: Abstract general behaviour from TopPypiReference (#299)
1 parent 0f76196 commit b3f7d07

File tree

8 files changed

+72
-77
lines changed

8 files changed

+72
-77
lines changed

src/twyn/base/exceptions.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,3 @@ def __init__(self, message: str = "") -> None:
3030
def show(self, file: Optional[IO[Any]] = None) -> None:
3131
logger.debug(self.format_message(), exc_info=True)
3232
logger.error(self.format_message(), exc_info=False)
33-
34-
35-
class PackageNormalizingError(TwynError):
36-
"""Exception for when it is not possible to normalize a package name."""
37-
38-
message = "Failed to normalize pacakges."

src/twyn/base/utils.py

Lines changed: 0 additions & 15 deletions
This file was deleted.

src/twyn/main.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
SELECTOR_METHOD_MAPPING,
88
SelectorMethod,
99
)
10-
from twyn.base.utils import normalize_packages
1110
from twyn.config.config_handler import ConfigHandler
1211
from twyn.dependency_parser.dependency_selector import DependencySelector
1312
from twyn.file_handler.file_handler import FileHandler
@@ -68,9 +67,9 @@ def check_dependencies(
6867
selector=_get_candidate_selector(config.selector_method),
6968
threshold_class=SimilarityThreshold,
7069
)
71-
normalized_allowlist_packages = normalize_packages(config.allowlist)
70+
normalized_allowlist_packages = TopPyPiReference.normalize_packages(config.allowlist)
7271
dependencies = dependencies if dependencies else _get_parsed_dependencies_from_file(config.dependency_file)
73-
normalized_dependencies = normalize_packages(dependencies)
72+
normalized_dependencies = TopPyPiReference.normalize_packages(dependencies)
7473

7574
typos_list = TyposquatCheckResultList()
7675
dependencies_list = (

src/twyn/trusted_packages/cache_handler.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88

99
from pydantic import BaseModel, ValidationError, field_validator
1010

11-
from twyn.base.exceptions import PackageNormalizingError
12-
from twyn.base.utils import normalize_packages
1311
from twyn.file_handler.exceptions import PathIsNotFileError, PathNotFoundError
1412
from twyn.file_handler.file_handler import FileHandler
1513
from twyn.trusted_packages.constants import CACHE_DIR, TRUSTED_PACKAGES_MAX_RETENTION_DAYS
@@ -31,14 +29,6 @@ def validate_saved_date(cls, v: str) -> str:
3129
else:
3230
return v
3331

34-
@field_validator("packages")
35-
@classmethod
36-
def validate_packages(cls, v: set[str]) -> set[str]:
37-
try:
38-
return normalize_packages(v)
39-
except PackageNormalizingError as e:
40-
raise ValueError(f"Failed to normalize packages: {e}") from e
41-
4232

4333
class CacheHandler:
4434
"""Cache class that provides basic read/write/delete operation for individual source cache files."""

src/twyn/trusted_packages/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,9 @@ class InvalidCacheError(TwynError):
2929
"""Error for when the cache content is not valid."""
3030

3131
message = "Invalid cache content"
32+
33+
34+
class PackageNormalizingError(TwynError):
35+
"""Exception for when it is not possible to normalize a package name."""
36+
37+
message = "Failed to normalize pacakges."
Lines changed: 61 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,56 @@
11
import logging
2-
from abc import ABC, abstractmethod
2+
import re
3+
from abc import abstractmethod
34
from datetime import datetime
45
from typing import Any, Union
56

67
import requests
8+
from typing_extensions import override
79

8-
from twyn.base.utils import normalize_packages
910
from twyn.trusted_packages.cache_handler import CacheEntry, CacheHandler
1011
from twyn.trusted_packages.exceptions import (
1112
EmptyPackagesListError,
1213
InvalidJSONError,
1314
InvalidPyPiFormatError,
15+
PackageNormalizingError,
1416
)
1517

1618
logger = logging.getLogger("twyn")
1719

1820

19-
class AbstractPackageReference(ABC):
20-
"""Represents a reference from where to retrieve trusted packages."""
21+
class AbstractPackageReference:
22+
"""Represents a reference from where to retrieve trusted packages.
23+
24+
It abstracts all the package-retrieval and caching logic.
25+
26+
It defines the `_parse` abstract method, so each subclass defines how to handle the feched data.
27+
It defines the `normalize_package` abstract method, so each subclass validates that the packages names are correct.
28+
"""
2129

2230
def __init__(self, source: str, cache_handler: Union[CacheHandler, None] = None) -> None:
2331
self.source = source
2432
self.cache_handler = cache_handler
2533

34+
@staticmethod
2635
@abstractmethod
27-
def get_packages(self) -> set[str]:
28-
"""Return the names of the trusted packages available in the reference."""
29-
30-
31-
class TopPyPiReference(AbstractPackageReference):
32-
"""Top PyPi packages retrieved from an online source."""
33-
34-
def get_packages(self) -> set[str]:
35-
"""Download and parse online source of top Python Package Index packages."""
36-
packages_to_use = set()
37-
packages_to_use = self._get_packages_from_cache_if_enabled()
38-
# we don't save the cache here, we keep it as it is so the date remains the original one.
39-
40-
if not packages_to_use:
41-
# no cache usage, no cache hit (non-existent or outdated) or cache was empty.
42-
logger.info("Fetching trusted packages from PyPI reference...")
43-
packages_to_use = self._parse(self._download())
36+
def _parse(packages_json: dict[str, Any]) -> set[str]:
37+
"""Parse and retrieve the packages within the given json structure."""
4438

45-
# New packages were downloaded, we create a new entry updating all values.
46-
self._save_trusted_packages_to_cache_if_enabled(packages_to_use)
39+
@staticmethod
40+
@abstractmethod
41+
def normalize_packages(packages: set[str]) -> set[str]:
42+
"""Normalize package names to make sure they're valid within the package manager context."""
4743

48-
normalized_packages = normalize_packages(packages_to_use)
49-
return normalized_packages
44+
def _download(self) -> dict[str, Any]:
45+
packages = requests.get(self.source)
46+
packages.raise_for_status()
47+
try:
48+
packages_json: dict[str, Any] = packages.json()
49+
except requests.exceptions.JSONDecodeError as err:
50+
raise InvalidJSONError from err
51+
else:
52+
logger.debug("Successfully downloaded trusted packages list from %s", self.source)
53+
return packages_json
5054

5155
def _save_trusted_packages_to_cache_if_enabled(self, packages: set[str]) -> None:
5256
"""Save trusted packages using CacheHandler."""
@@ -67,18 +71,28 @@ def _get_packages_from_cache_if_enabled(self) -> set[str]:
6771

6872
return cache_entry.packages
6973

70-
def _download(self) -> dict[str, Any]:
71-
packages = requests.get(self.source)
72-
packages.raise_for_status()
73-
try:
74-
packages_json: dict[str, Any] = packages.json()
75-
except requests.exceptions.JSONDecodeError as err:
76-
raise InvalidJSONError from err
74+
def get_packages(self) -> set[str]:
75+
"""Download and parse online source of top Python Package Index packages."""
76+
packages_to_use = set()
77+
packages_to_use = self._get_packages_from_cache_if_enabled()
78+
# we don't save the cache here, we keep it as it is so the date remains the original one.
79+
80+
if not packages_to_use:
81+
# no cache usage, no cache hit (non-existent or outdated) or cache was empty.
82+
logger.info("Fetching trusted packages from PyPI reference...")
83+
packages_to_use = self._parse(self._download())
84+
85+
# New packages were downloaded, we create a new entry updating all values.
86+
self._save_trusted_packages_to_cache_if_enabled(packages_to_use)
7787

78-
logger.debug("Successfully downloaded trusted packages list from %s", self.source)
88+
normalized_packages = self.normalize_packages(packages_to_use)
89+
return normalized_packages
7990

80-
return packages_json
8191

92+
class TopPyPiReference(AbstractPackageReference):
93+
"""Top PyPi packages retrieved from an online source."""
94+
95+
@override
8296
@staticmethod
8397
def _parse(packages_info: dict[str, Any]) -> set[str]:
8498
try:
@@ -90,5 +104,17 @@ def _parse(packages_info: dict[str, Any]) -> set[str]:
90104
raise EmptyPackagesListError
91105

92106
logger.debug("Successfully parsed trusted packages list")
93-
94107
return names
108+
109+
@override
110+
@staticmethod
111+
def normalize_packages(packages: set[str]) -> set[str]:
112+
"""Normalize dependency names according to PyPi https://packaging.python.org/en/latest/specifications/name-normalization/."""
113+
renamed_packages = {re.sub(r"[-_.]+", "-", name).lower() for name in packages}
114+
115+
pattern = re.compile(r"^([a-z0-9]|[a-z0-9][a-z0-9._-]*[a-z0-9])\Z") # noqa: F821
116+
for package in renamed_packages:
117+
if not pattern.match(package):
118+
raise PackageNormalizingError(f"Package name '{package}' does not match required pattern")
119+
120+
return renamed_packages

tests/main/test_main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -172,12 +172,12 @@ def test_check_dependencies_with_input_from_cli_accepts_multiple_dependencies(
172172
assert TyposquatCheckResult(dependency="reqests", similars=["requests"]) in error.errors
173173
assert TyposquatCheckResult(dependency="my-package", similars=["mypackage"]) in error.errors
174174

175-
@patch("twyn.main.TopPyPiReference")
175+
@patch("twyn.main.TopPyPiReference.get_packages")
176176
@patch("twyn.main._get_parsed_dependencies_from_file")
177177
def test_check_dependencies_ignores_package_in_allowlist(
178-
self, mock_get_parsed_dependencies_from_file: Mock, mock_top_pypi_reference: Mock
178+
self, mock_get_parsed_dependencies_from_file: Mock, mock_get_packages: Mock
179179
) -> None:
180-
mock_top_pypi_reference.return_value.get_packages.return_value = {"mypackage"}
180+
mock_get_packages.return_value = {"mypackage"}
181181
mock_get_parsed_dependencies_from_file.return_value = {"my-package"}
182182

183183
# Verify that before the whitelist configuration the package is classified as an error.

tests/trusted_packages/test_cache_handler.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,6 @@ def test_is_entry_outdated_with_invalid_date_format(self) -> None:
121121
with pytest.raises(ValidationError):
122122
CacheEntry(saved_date="invalid-date-format", packages={"package1"})
123123

124-
def test_cache_entry_invalid_package_normalization(self) -> None:
125-
"""Test CacheEntry raises ValidationError when package normalization fails."""
126-
with pytest.raises(ValidationError):
127-
CacheEntry(saved_date="2024-01-01", packages={"../invalid-package"})
128-
129124
def test_clear_all_removes_all_cache_files(self, tmp_path: Path) -> None:
130125
"""Test clear_all removes all cache files and directory."""
131126
cache_handler = CacheHandler(str(tmp_path))

0 commit comments

Comments
 (0)