diff --git a/src/twyn/dependency_managers/__init__.py b/src/twyn/dependency_managers/__init__.py index 8b13789..e69de29 100644 --- a/src/twyn/dependency_managers/__init__.py +++ b/src/twyn/dependency_managers/__init__.py @@ -1 +0,0 @@ - diff --git a/src/twyn/dependency_managers/managers.py b/src/twyn/dependency_managers/managers.py index 592ef17..60c226d 100644 --- a/src/twyn/dependency_managers/managers.py +++ b/src/twyn/dependency_managers/managers.py @@ -9,6 +9,9 @@ UV_LOCK, YARN_LOCK, ) +from twyn.trusted_packages.managers.base import TrustedPackagesProtocol +from twyn.trusted_packages.managers.trusted_npm_packages_manager import TrustedNpmPackageManager +from twyn.trusted_packages.managers.trusted_pypi_packages_manager import TrustedPackages from twyn.trusted_packages.references.base import AbstractPackageReference from twyn.trusted_packages.references.top_npm_reference import TopNpmReference from twyn.trusted_packages.references.top_pypi_reference import TopPyPiReference @@ -30,6 +33,9 @@ class DependencyManager: dependency_files: set[str] """Set of supported dependency file names.""" + trusted_packages_manager: type[TrustedPackagesProtocol] + """TrustedPackages class that will determine if there's a typo or not.""" + def matches_dependency_file(self, dependency_file: str) -> bool: """Check if this manager can handle the given dependency file.""" return Path(dependency_file).name in self.dependency_files @@ -43,13 +49,16 @@ def get_alternative_source(self, sources: dict[str, str]) -> str | None: name="npm", trusted_packages_source=TopNpmReference, dependency_files={PACKAGE_LOCK_JSON, YARN_LOCK}, + trusted_packages_manager=TrustedNpmPackageManager, ) pypi_dependency_manager = DependencyManager( name="pypi", trusted_packages_source=TopPyPiReference, dependency_files={UV_LOCK, POETRY_LOCK, REQUIREMENTS_TXT}, + trusted_packages_manager=TrustedPackages, ) + DEPENDENCY_MANAGERS: list[DependencyManager] = [pypi_dependency_manager, npm_dependency_manager] """List of available dependency manager classes.""" diff --git a/src/twyn/main.py b/src/twyn/main.py index 46bf110..66ec764 100644 --- a/src/twyn/main.py +++ b/src/twyn/main.py @@ -20,13 +20,13 @@ from twyn.similarity.algorithm import EditDistance, SimilarityThreshold from twyn.trusted_packages.cache_handler import CacheHandler from twyn.trusted_packages.exceptions import InvalidArgumentsError +from twyn.trusted_packages.managers.base import TrustedPackagesProtocol from twyn.trusted_packages.models import ( TyposquatCheckResultEntry, TyposquatCheckResultFromSource, TyposquatCheckResults, ) from twyn.trusted_packages.references.base import AbstractPackageReference -from twyn.trusted_packages.trusted_packages import TrustedPackages logger = logging.getLogger("twyn") logger.addHandler(logging.NullHandler()) @@ -134,7 +134,7 @@ def _analyze_dependencies_from_input( dependency_manager = get_dependency_manager_from_name(package_ecosystem) source = dependency_manager.get_alternative_source({"pypi": pypi_source, "npm": npm_source}) top_package_reference = dependency_manager.trusted_packages_source(source, maybe_cache_handler) - trusted_packages = TrustedPackages( + trusted_packages = dependency_manager.trusted_packages_manager( names=top_package_reference.get_packages(), algorithm=EditDistance(), selector=selector_method, @@ -177,7 +177,7 @@ def _analyze_packages_from_source( top_package_reference = manager.trusted_packages_source(source, maybe_cache_handler) packages_from_source = top_package_reference.get_packages() - trusted_packages = TrustedPackages( + trusted_packages = manager.trusted_packages_manager( names=packages_from_source, algorithm=EditDistance(), selector=selector_method, @@ -200,7 +200,7 @@ def _analyze_packages_from_source( def _analyze_dependencies( top_package_reference: AbstractPackageReference, - trusted_packages: TrustedPackages, + trusted_packages: TrustedPackagesProtocol, packages: set[str], allowlist: set[str], show_progress_bar: bool, diff --git a/src/twyn/trusted_packages/__init__.py b/src/twyn/trusted_packages/__init__.py index a43e78b..2240a18 100644 --- a/src/twyn/trusted_packages/__init__.py +++ b/src/twyn/trusted_packages/__init__.py @@ -1,5 +1,6 @@ +from twyn.trusted_packages.managers.trusted_npm_packages_manager import TrustedNpmPackageManager +from twyn.trusted_packages.managers.trusted_pypi_packages_manager import TrustedPackages from twyn.trusted_packages.references.top_npm_reference import TopNpmReference from twyn.trusted_packages.references.top_pypi_reference import TopPyPiReference -from twyn.trusted_packages.trusted_packages import TrustedPackages -__all__ = ["TopPyPiReference", "TrustedPackages", "TopNpmReference"] +__all__ = ["TopPyPiReference", "TopNpmReference", "TrustedPackages", "TrustedNpmPackageManager"] diff --git a/src/twyn/trusted_packages/managers/__init__.py b/src/twyn/trusted_packages/managers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/twyn/trusted_packages/managers/base.py b/src/twyn/trusted_packages/managers/base.py new file mode 100644 index 0000000..1bbba49 --- /dev/null +++ b/src/twyn/trusted_packages/managers/base.py @@ -0,0 +1,13 @@ +from collections import defaultdict +from typing import Any, Protocol + +from twyn.trusted_packages.models import TyposquatCheckResultEntry + +OrderedPackages = defaultdict[str, set[str]] +"""Type alias for mapping package names by ecosystem.""" + + +class TrustedPackagesProtocol(Protocol): + def __contains__(self, obj: Any) -> bool: ... + + def get_typosquat(self, package_name: str) -> TyposquatCheckResultEntry: ... diff --git a/src/twyn/trusted_packages/managers/trusted_npm_packages_manager.py b/src/twyn/trusted_packages/managers/trusted_npm_packages_manager.py new file mode 100644 index 0000000..3dcb20c --- /dev/null +++ b/src/twyn/trusted_packages/managers/trusted_npm_packages_manager.py @@ -0,0 +1,77 @@ +from collections import defaultdict +from typing import Any + +from twyn.similarity.algorithm import ( + AbstractSimilarityAlgorithm, + SimilarityThreshold, +) +from twyn.trusted_packages.managers.base import OrderedPackages +from twyn.trusted_packages.models import TyposquatCheckResultEntry +from twyn.trusted_packages.selectors import AbstractSelector + + +class TrustedNpmPackageManager: + """Representation of namespaces that can be trusted.""" + + def __init__( + self, + names: set[str], + algorithm: AbstractSimilarityAlgorithm, + selector: AbstractSelector, + threshold_class: type[SimilarityThreshold], + ) -> None: + self.packages, self.namespaces = self._create_names_dictionary(names) + + self.threshold_class = threshold_class + self.selector = selector + self.algorithm = algorithm + + def __contains__(self, obj: Any) -> bool: + """Check if an object exists in the trusted namespaces.""" + if isinstance(obj, str): + return obj in self.packages[obj[0]] or obj in self.namespaces + return False + + def _create_names_dictionary(self, names: set[str]) -> tuple[OrderedPackages, OrderedPackages]: + """Create a dictionary which will group all packages that start with the same letter under the same key.""" + first_letter_names: OrderedPackages = defaultdict(set) + namespaces: OrderedPackages = defaultdict(set) + for name in names: + if name.startswith("@"): + namespace, dependency = name.split("/") + namespaces[namespace].add(dependency) + else: + first_letter_names[name[0]].add(name) + return first_letter_names, namespaces + + def _get_typosquats_from_namespace_dependency(self, package_name: str) -> TyposquatCheckResultEntry: + namespace, dependency = package_name.split("/") + threshold = self.threshold_class.from_name(namespace) + typosquat_result = TyposquatCheckResultEntry(dependency=package_name) + for trusted_namespace_name in self.selector.select_similar_names( + names={"@": self.namespaces.keys()}, name=namespace + ): + distance = self.algorithm.get_distance(namespace, trusted_namespace_name) + if threshold.is_inside_threshold(distance) and dependency in self.namespaces[trusted_namespace_name]: + typosquat_result.add(f"{trusted_namespace_name}/{dependency}") + return typosquat_result + + def _get_typosquats_from_dependency(self, package_name: str) -> TyposquatCheckResultEntry: + threshold = self.threshold_class.from_name(package_name) + typosquat_result = TyposquatCheckResultEntry(dependency=package_name) + for trusted_package_name in self.selector.select_similar_names(names=self.packages, name=package_name): + distance = self.algorithm.get_distance(package_name, trusted_package_name) + if threshold.is_inside_threshold(distance): + typosquat_result.add(trusted_package_name) + return typosquat_result + + def get_typosquat(self, package_name: str) -> TyposquatCheckResultEntry: + """Check if a given package name is similar to any trusted package and returns it. + + Only if there is a match on the first letter can a package name be + considered similar to another one. The algorithm provided and the threshold + are used to determine if the package name can be considered similar. + """ + if package_name.startswith("@"): + return self._get_typosquats_from_namespace_dependency(package_name) + return self._get_typosquats_from_dependency(package_name) diff --git a/src/twyn/trusted_packages/trusted_packages.py b/src/twyn/trusted_packages/managers/trusted_pypi_packages_manager.py similarity index 85% rename from src/twyn/trusted_packages/trusted_packages.py rename to src/twyn/trusted_packages/managers/trusted_pypi_packages_manager.py index f8e6c5d..91759d1 100644 --- a/src/twyn/trusted_packages/trusted_packages.py +++ b/src/twyn/trusted_packages/managers/trusted_pypi_packages_manager.py @@ -5,12 +5,10 @@ AbstractSimilarityAlgorithm, SimilarityThreshold, ) +from twyn.trusted_packages.managers.base import OrderedPackages from twyn.trusted_packages.models import TyposquatCheckResultEntry from twyn.trusted_packages.selectors import AbstractSelector -_PackageNames = defaultdict[str, set[str]] -"""Type alias for mapping package names by ecosystem.""" - class TrustedPackages: """Representation of packages that can be trusted.""" @@ -22,7 +20,7 @@ def __init__( selector: AbstractSelector, threshold_class: type[SimilarityThreshold], ) -> None: - self.names: _PackageNames = self._create_names_dictionary(names) + self.names = self._create_names_dictionary(names) self.threshold_class = threshold_class self.selector = selector self.algorithm = algorithm @@ -34,17 +32,14 @@ def __contains__(self, obj: Any) -> bool: return False @staticmethod - def _create_names_dictionary(names: set[str]) -> _PackageNames: + def _create_names_dictionary(names: set[str]) -> OrderedPackages: """Create a dictionary which will group all packages that start with the same letter under the same key.""" first_letter_names = defaultdict(set) for name in names: first_letter_names[name[0]].add(name) return first_letter_names - def get_typosquat( - self, - package_name: str, - ) -> TyposquatCheckResultEntry: + def get_typosquat(self, package_name: str) -> TyposquatCheckResultEntry: """Check if a given package name is similar to any trusted package and returns it. Only if there is a match on the first letter can a package name be diff --git a/src/twyn/trusted_packages/references/base.py b/src/twyn/trusted_packages/references/base.py index ebf7068..78cf5e4 100644 --- a/src/twyn/trusted_packages/references/base.py +++ b/src/twyn/trusted_packages/references/base.py @@ -1,5 +1,7 @@ import logging from abc import abstractmethod +from collections.abc import Iterator +from dataclasses import dataclass, field from datetime import datetime from typing import Any @@ -14,6 +16,35 @@ logger = logging.getLogger("twyn") +@dataclass +class NormalizedPackages: + packages: set[str] + namespaces: dict[str, set[str]] | None = None + _raw_namespaces: set[str] = field(default_factory=set) + + def __post__init__(self) -> None: + if self.namespaces: + for namespace in self.namespaces: + for package_name in self.namespaces[namespace]: + self._raw_namespaces.add(f"{namespace}/{package_name}") + + def __iter__(self) -> Iterator[str]: + yield from self.packages + + if not self.namespaces: + return + + for namespace in self.namespaces: + for package_name in self.namespaces[namespace]: + yield f"{namespace}/{package_name}" + + def __contains__(self, value: str) -> bool: + if not isinstance(value, str): + return False + + return value in self.packages or value in self._raw_namespaces + + class AbstractPackageReference: """Represents a reference from where to retrieve trusted packages. @@ -32,7 +63,7 @@ def __init__(self, source: str | None = None, cache_handler: CacheHandler | None @staticmethod @abstractmethod - def normalize_packages(packages: set[str]) -> set[str]: + def normalize_packages(packages: set[str]) -> NormalizedPackages: """Normalize package names to make sure they're valid within the package manager context.""" def _download(self) -> dict[str, Any]: @@ -64,7 +95,7 @@ def _get_packages_from_cache_if_enabled(self) -> set[str]: return cache_entry.packages - def get_packages(self) -> set[str]: + def get_packages(self) -> NormalizedPackages: """Download and parse online source of top packages from the package ecosystem.""" packages = self._get_packages_from_cache_if_enabled() # we don't save the cache here, we keep it as it is so the date remains the original one. @@ -84,5 +115,4 @@ def get_packages(self) -> set[str]: # New packages were downloaded, we create a new entry updating all values. self._save_trusted_packages_to_cache_if_enabled(packages) - normalized_packages = self.normalize_packages(packages) - return normalized_packages + return self.normalize_packages(packages) diff --git a/src/twyn/trusted_packages/references/top_npm_reference.py b/src/twyn/trusted_packages/references/top_npm_reference.py index e6ca71c..f6f293b 100644 --- a/src/twyn/trusted_packages/references/top_npm_reference.py +++ b/src/twyn/trusted_packages/references/top_npm_reference.py @@ -6,7 +6,7 @@ from twyn.trusted_packages.exceptions import ( PackageNormalizingError, ) -from twyn.trusted_packages.references.base import AbstractPackageReference +from twyn.trusted_packages.references.base import AbstractPackageReference, NormalizedPackages logger = logging.getLogger("twyn") @@ -21,15 +21,28 @@ class TopNpmReference(AbstractPackageReference): @override @staticmethod - def normalize_packages(packages: set[str]) -> set[str]: + def normalize_packages(packages: set[str]) -> NormalizedPackages: """Normalize dependency names according to npm.""" if not packages: logger.debug("Tried to normalize packages, but none were provided") - return set() + return NormalizedPackages(packages=set()) + + # Extract namespaces from package names + package_pattern = re.compile(r"^[a-z0-9-~][a-z0-9-._~]*$") # noqa: F821 + namespace_pattern = re.compile(r"^(?:@[a-z0-9-~][a-z0-9-._~]*)\/[a-z0-9-~][a-z0-9-._~]*$") # noqa: F821 + + extracted_namespaces: dict[str, set[str]] = {} + regular_packages = set() - pattern = re.compile(r"^(?:@[a-z0-9-~][a-z0-9-._~]*\/)?[a-z0-9-~][a-z0-9-._~]*$") # noqa: F821 for package in packages: - if not pattern.match(package.lower()): + if namespace_pattern.match(package.lower()): + namespace, namespace_package = package.split("/") + if namespace not in extracted_namespaces: + extracted_namespaces[namespace] = set() + extracted_namespaces[namespace].add(namespace_package) + elif package_pattern.match(package.lower()): + regular_packages.add(package) + else: raise PackageNormalizingError(f"Package name '{package}' does not match required pattern") - return packages + return NormalizedPackages(packages=regular_packages, namespaces=extracted_namespaces) diff --git a/src/twyn/trusted_packages/references/top_pypi_reference.py b/src/twyn/trusted_packages/references/top_pypi_reference.py index 671c1bc..4672d1f 100644 --- a/src/twyn/trusted_packages/references/top_pypi_reference.py +++ b/src/twyn/trusted_packages/references/top_pypi_reference.py @@ -6,7 +6,7 @@ from twyn.trusted_packages.exceptions import ( PackageNormalizingError, ) -from twyn.trusted_packages.references.base import AbstractPackageReference +from twyn.trusted_packages.references.base import AbstractPackageReference, NormalizedPackages logger = logging.getLogger("twyn") @@ -21,11 +21,11 @@ class TopPyPiReference(AbstractPackageReference): @override @staticmethod - def normalize_packages(packages: set[str]) -> set[str]: + def normalize_packages(packages: set[str]) -> NormalizedPackages: """Normalize dependency names according to PyPi https://packaging.python.org/en/latest/specifications/name-normalization/.""" if not packages: logger.debug("Tried to normalize packages, but none were provided") - return set() + return NormalizedPackages(packages=set()) renamed_packages = {re.sub(r"[-_.]+", "-", name).lower() for name in packages} pattern = re.compile(r"^([a-z0-9]|[a-z0-9][a-z0-9._-]*[a-z0-9])\Z") # noqa: F821 @@ -33,4 +33,4 @@ def normalize_packages(packages: set[str]) -> set[str]: if not pattern.match(package): raise PackageNormalizingError(f"Package name '{package}' does not match required pattern") - return renamed_packages + return NormalizedPackages(packages=renamed_packages) diff --git a/src/twyn/trusted_packages/selectors.py b/src/twyn/trusted_packages/selectors.py index 7d97a6b..2ebd56b 100644 --- a/src/twyn/trusted_packages/selectors.py +++ b/src/twyn/trusted_packages/selectors.py @@ -10,14 +10,14 @@ if TYPE_CHECKING: from collections.abc import Iterable - from twyn.trusted_packages.trusted_packages import _PackageNames + from twyn.trusted_packages.managers.trusted_pypi_packages_manager import OrderedPackages logger = logging.getLogger("twyn") class AbstractSelector(ABC): @abstractmethod - def select_similar_names(self, names: _PackageNames, name: str) -> Iterable[str]: + def select_similar_names(self, names: OrderedPackages, name: str) -> Iterable[str]: """Override this to select names that are similar to the provided one.""" def __str__(self) -> str: @@ -28,7 +28,7 @@ def __str__(self) -> str: class FirstLetterNearbyInKeyboard(AbstractSelector): """Selects names that start with a letter that is nearby in an English Keyboard.""" - def select_similar_names(self, names: _PackageNames, name: str) -> Iterable[str]: + def select_similar_names(self, names: OrderedPackages, name: str) -> Iterable[str]: """Select package names with first letters nearby on keyboard.""" candidate_characters = self._get_candidate_characters(name[0]) for letter in candidate_characters: @@ -46,7 +46,7 @@ def _get_candidate_characters(character: str) -> list[str]: class FirstLetterExact(AbstractSelector): """Selects names that share the same first letter.""" - def select_similar_names(self, names: _PackageNames, name: str) -> Iterable[str]: + def select_similar_names(self, names: OrderedPackages, name: str) -> Iterable[str]: """Select package names that start with the same letter.""" yield from names[name[0]] @@ -54,7 +54,7 @@ def select_similar_names(self, names: _PackageNames, name: str) -> Iterable[str] class AllSimilar(AbstractSelector): """Consider all names to be similar.""" - def select_similar_names(self, names: _PackageNames, name: str) -> Iterable[str]: + def select_similar_names(self, names: OrderedPackages, name: str) -> Iterable[str]: """Return all available package names as candidates.""" - for candidates in names.values(): - yield from candidates + for candidates in names: + yield from names[candidates] diff --git a/tests/conftest.py b/tests/conftest.py index 02bdb08..31f0836 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -452,6 +452,52 @@ def yarn_lock_file_v2(tmp_path: Path) -> Iterator[Path]: yield tmp_file +@pytest.fixture +def package_lock_json_file_with_namespace_typo(tmp_path: Path) -> Iterator[Path]: + """NPM package-lock.json file with both namespace and regular package typos.""" + package_lock_file = tmp_path / "package-lock.json" + data = """{ + "name": "test-project", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "test-project", + "version": "1.0.0", + "dependencies": { + "@awz/sdk": "3.450.0", + "@aws/sdk": "3.450.0", + "@awz/zdk": "1.2.3", + "lodas": "1.2.3" + } + }, + "node_modules/@awz/sdk": { + "version": "3.450.0", + "resolved": "https://registry.npmjs.org/@awz/sdk/-/sdk-3.450.0.tgz", + "integrity": "sha512-fake-integrity-hash" + }, + "node_modules/@aws/sdk": { + "version": "3.450.0", + "resolved": "https://registry.npmjs.org/@awz/sdk/-/sdk-3.450.0.tgz", + "integrity": "sha512-fake-integrity-hash" + }, + "node_modules/@awz/zdk": { + "version": "3.450.0", + "resolved": "https://registry.npmjs.org/@awz/sdk/-/sdk-3.450.0.tgz", + "integrity": "sha512-fake-integrity-hash" + }, + "node_modules/lodas": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodas/-/lodas-4.17.21.tgz", + "integrity": "sha512-another-fake-hash" + } + } + }""" + with create_tmp_file(package_lock_file, data) as tmp_file: + yield tmp_file + + @pytest.fixture(autouse=True) def fail_on_requests_get(request) -> Generator[None, Any, None]: with mock.patch("requests.get") as m_get: diff --git a/tests/dependency_managers/test_dependency_managers.py b/tests/dependency_managers/test_dependency_managers.py index 3cc947b..c1d8e6c 100644 --- a/tests/dependency_managers/test_dependency_managers.py +++ b/tests/dependency_managers/test_dependency_managers.py @@ -3,7 +3,10 @@ from twyn.dependency_managers.managers import DependencyManager manager = DependencyManager( - name="pypi", trusted_packages_source=Mock(), dependency_files={"requirements.txt", "poetry.lock"} + name="pypi", + trusted_packages_source=Mock(), + dependency_files={"requirements.txt", "poetry.lock"}, + trusted_packages_manager=Mock(), ) diff --git a/tests/main/test_main.py b/tests/main/test_main.py index eca9259..e4b66fc 100644 --- a/tests/main/test_main.py +++ b/tests/main/test_main.py @@ -183,6 +183,31 @@ def test_check_dependencies_detects_typosquats_from_file_and_language_is_set( ] ) + @patch("twyn.trusted_packages.TopNpmReference.get_packages") + def test_check_dependencies_detects_namespace_typosquats_from_file( + self, mock_get_packages: Mock, package_lock_json_file_with_namespace_typo: Path + ) -> None: + """Check that both namespace and regular package typosquats are detected when reading dependencies from file.""" + mock_get_packages.return_value = {"@aws/sdk", "lodash"} + + error = check_dependencies( + dependency_files={str(package_lock_json_file_with_namespace_typo)}, + use_cache=False, + ) + + assert mock_get_packages.call_count == 1 + assert error == TyposquatCheckResults( + results=[ + TyposquatCheckResultFromSource( + errors=[ + TyposquatCheckResultEntry(dependency="lodas", similars=["lodash"]), + TyposquatCheckResultEntry(dependency="@awz/sdk", similars=["@aws/sdk"]), + ], + source=str(package_lock_json_file_with_namespace_typo), + ) + ] + ) + @patch("twyn.trusted_packages.TopPyPiReference.get_packages") @patch("twyn.main._get_config") @patch("twyn.dependency_parser.parsers.abstract_parser.Path") @@ -274,6 +299,35 @@ def test_check_dependencies_with_input_from_cli_detects_typosquats( ] ) + @patch("twyn.trusted_packages.TopNpmReference._get_packages_from_cache_if_enabled") + def test_check_dependencies_with_input_from_cli_detects_typosquats_on_namespace_packages( + self, mock_get_packages_from_cache: Mock + ) -> None: + """Test that typosquats can be detected on npm packages with namespaces (@scope/package format).""" + mock_get_packages_from_cache.return_value = {"@aws/sdk", "@react/core", "lodash"} + error = check_dependencies( + dependencies={ + "@awz/sdk", # Typosquat of @aws/sdk + "@aws/sdk", # not a typo + "@aws/zdk", # Not a typo, since namespace is a trusted one + "@awz/zdk", # Not a typo, even if namespace is similar to a trusted one, the package is not similiar to any known one. + "lodas", + }, + package_ecosystem="npm", + ) + + assert error == TyposquatCheckResults( + results=[ + TyposquatCheckResultFromSource( + errors=[ + TyposquatCheckResultEntry(dependency="lodas", similars=["lodash"]), + TyposquatCheckResultEntry(dependency="@awz/sdk", similars=["@aws/sdk"]), + ], + source="manual_input", + ) + ] + ) + @patch("twyn.trusted_packages.TopPyPiReference._get_packages_from_cache_if_enabled") def test_check_dependencies_recursive_and_dependency_file_set( self, mock_get_packages_from_cache: Mock, uv_lock_file_with_typo: Path diff --git a/tests/trusted_packages/test_references.py b/tests/trusted_packages/test_references.py index 115b460..e6d7954 100644 --- a/tests/trusted_packages/test_references.py +++ b/tests/trusted_packages/test_references.py @@ -13,7 +13,7 @@ InvalidJSONError, PackageNormalizingError, ) -from twyn.trusted_packages.references.base import AbstractPackageReference +from twyn.trusted_packages.references.base import AbstractPackageReference, NormalizedPackages from tests.conftest import patch_npm_packages_download, patch_pypi_packages_download @@ -22,14 +22,16 @@ class TestAbstractPackageReference: class DummyPackageReference(AbstractPackageReference): """Returns always the same packages, used for testing the interface.""" - def get_packages(self) -> set[str]: - return {"foo", "bar"} + def get_packages(self) -> NormalizedPackages: + return NormalizedPackages(packages={"foo", "bar"}) + + @staticmethod + def normalize_packages(packages: set[str]) -> NormalizedPackages: + return NormalizedPackages(packages=packages) def test_get_packages(self) -> None: - assert self.DummyPackageReference(source="foo", cache_handler=CacheHandler()).get_packages() == { - "foo", - "bar", - } + result = self.DummyPackageReference(source="foo", cache_handler=CacheHandler()).get_packages() + assert set(result) == {"foo", "bar"} @freeze_time("2025-8-19") def test_get_trusted_packages_uses_valid_cache(self, tmp_path: Path) -> None: @@ -50,7 +52,7 @@ def test_get_trusted_packages_uses_valid_cache(self, tmp_path: Path) -> None: result = TopPyPiReference("pypi", cache_handler=cache_handler).get_packages() assert m_pypi.call_count == 0 - assert result == {"flask", "fastapi", "requests", "django"} + assert set(result) == {"flask", "fastapi", "requests", "django"} def test_get_packages_no_cache(self) -> None: """Test that when use_cache is False, cache is not read or written, and packages are retrieved.""" @@ -105,7 +107,7 @@ def test_get_packages_downloads_when_cache_has_invalid_package_names(self, tmp_p # Should download from source due to invalid package names in cache assert mock_pypi.call_count == 1 - assert result == {"valid-package", "another-valid", "third-valid"} + assert set(result) == {"valid-package", "another-valid", "third-valid"} @freeze_time("2025-8-21", tz_offset=0) def test_cache_is_saved_when_not_existing(self, tmp_path: Path) -> None: @@ -119,7 +121,7 @@ def test_cache_is_saved_when_not_existing(self, tmp_path: Path) -> None: # The packages were downloaded and match the expected result assert m_pypi.call_count == 1 - assert retrieved_packages == set(cached_packages) + assert set(retrieved_packages) == set(cached_packages) # The packages were saved to the cache file, with its associated metadata cache_content = cache_handler.get_cache_entry("pypi") @@ -167,7 +169,7 @@ def test_get_trusted_packages(self, tmp_path: Path) -> None: ref = TopPyPiReference(cache_handler=CacheHandler(str(tmp_path / "cache"))) packages = ref.get_packages() - assert packages == {"foo", "bar", "django", "requests", "sqlalchemy"} + assert set(packages) == {"foo", "bar", "django", "requests", "sqlalchemy"} assert m_pypi.call_count == 1 @pytest.mark.parametrize( @@ -189,7 +191,7 @@ def test_normalize_package_when_loaded_from_cache( ref = TopPyPiReference(cache_handler=CacheHandler(str(tmp_path / "cache"))) packages = ref.get_packages() - assert packages == {"my-package"} + assert set(packages) == {"my-package"} assert m_pypi.call_count == 0 assert mock_get_packages_from_cache.call_count == 1 @@ -206,17 +208,17 @@ def test_normalize_package_when_loaded_from_cache( def test_normalize_package_when_downloaded( self, mock_get_packages_from_cache: Mock, package_name: Mock, tmp_path: Path ) -> None: - mock_get_packages_from_cache.return_value = {} + mock_get_packages_from_cache.return_value = set() with patch_pypi_packages_download([package_name]) as m_pypi: ref = TopPyPiReference() packages = ref.get_packages() - assert packages == {"my-package"} + assert set(packages) == {"my-package"} assert m_pypi.call_count == 1 assert mock_get_packages_from_cache.call_count == 1 - def test_normalize_package_invalid_name_raises(self): + def test_normalize_package_invalid_name_raises(self) -> None: ref = TopPyPiReference() with pytest.raises(PackageNormalizingError): ref.normalize_packages({"INVALID PACKAGE NAME!"}) @@ -224,16 +226,24 @@ def test_normalize_package_invalid_name_raises(self): class TestTopNpmReference: def test_get_trusted_packages(self, tmp_path: Path) -> None: - test_packages = ["foo", "bar", "react", "express", "lodash"] + """Test downloading packages and verify all packages are saved to cache in their original form.""" + test_packages = ["foo", "bar", "react", "express", "lodash", "@aws/sdk"] + cache_handler = CacheHandler(str(tmp_path / "cache")) with patch_npm_packages_download(test_packages) as m_npm: - ref = TopNpmReference(cache_handler=CacheHandler(str(tmp_path / "cache"))) + ref = TopNpmReference(cache_handler=cache_handler) packages = ref.get_packages() - assert packages == {"foo", "bar", "react", "express", "lodash"} + # Verify packages were downloaded + assert set(packages) == {"foo", "bar", "react", "express", "lodash", "@aws/sdk"} assert m_npm.call_count == 1 - def test_normalize_package_invalid_name_raises(self): + # Verify cache entry was created with all packages in their original form + cache_entry = cache_handler.get_cache_entry(ref.source) + assert cache_entry is not None + assert cache_entry.packages == {"foo", "bar", "react", "express", "lodash", "@aws/sdk"} + + def test_normalize_package_invalid_name_raises(self) -> None: ref = TopNpmReference() with pytest.raises(PackageNormalizingError): ref.normalize_packages({"INVALID PACKAGE NAME!"}) diff --git a/tests/trusted_packages/test_trusted_packages.py b/tests/trusted_packages/test_trusted_packages.py index 1b9db3d..649b2ae 100644 --- a/tests/trusted_packages/test_trusted_packages.py +++ b/tests/trusted_packages/test_trusted_packages.py @@ -5,14 +5,14 @@ EditDistance, SimilarityThreshold, ) +from twyn.trusted_packages.managers.trusted_pypi_packages_manager import ( + TrustedPackages, + TyposquatCheckResultEntry, +) from twyn.trusted_packages.selectors import ( FirstLetterExact, FirstLetterNearbyInKeyboard, ) -from twyn.trusted_packages.trusted_packages import ( - TrustedPackages, - TyposquatCheckResultEntry, -) class TestTrustedPackages: