Skip to content

Commit f4082a3

Browse files
committed
feat: treat namespaces as a special case when checking typos
1 parent bdac02f commit f4082a3

File tree

18 files changed

+313
-62
lines changed

18 files changed

+313
-62
lines changed
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +0,0 @@
1-

src/twyn/dependency_managers/managers.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
UV_LOCK,
1010
YARN_LOCK,
1111
)
12+
from twyn.trusted_packages.managers.base import TrustedPackagesProtocol
13+
from twyn.trusted_packages.managers.trusted_npm_packages_manager import TrustedNpmPackageManager
14+
from twyn.trusted_packages.managers.trusted_pypi_packages_manager import TrustedPackages
1215
from twyn.trusted_packages.references.base import AbstractPackageReference
1316
from twyn.trusted_packages.references.top_npm_reference import TopNpmReference
1417
from twyn.trusted_packages.references.top_pypi_reference import TopPyPiReference
@@ -30,6 +33,9 @@ class DependencyManager:
3033
dependency_files: set[str]
3134
"""Set of supported dependency file names."""
3235

36+
trusted_packages_manager: type[TrustedPackagesProtocol]
37+
"""TrustedPackages class that will determine if there's a typo or not."""
38+
3339
def matches_dependency_file(self, dependency_file: str) -> bool:
3440
"""Check if this manager can handle the given dependency file."""
3541
return Path(dependency_file).name in self.dependency_files
@@ -43,13 +49,16 @@ def get_alternative_source(self, sources: dict[str, str]) -> str | None:
4349
name="npm",
4450
trusted_packages_source=TopNpmReference,
4551
dependency_files={PACKAGE_LOCK_JSON, YARN_LOCK},
52+
trusted_packages_manager=TrustedNpmPackageManager,
4653
)
4754
pypi_dependency_manager = DependencyManager(
4855
name="pypi",
4956
trusted_packages_source=TopPyPiReference,
5057
dependency_files={UV_LOCK, POETRY_LOCK, REQUIREMENTS_TXT},
58+
trusted_packages_manager=TrustedPackages,
5159
)
5260

61+
5362
DEPENDENCY_MANAGERS: list[DependencyManager] = [pypi_dependency_manager, npm_dependency_manager]
5463
"""List of available dependency manager classes."""
5564

src/twyn/main.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020
from twyn.similarity.algorithm import EditDistance, SimilarityThreshold
2121
from twyn.trusted_packages.cache_handler import CacheHandler
2222
from twyn.trusted_packages.exceptions import InvalidArgumentsError
23+
from twyn.trusted_packages.managers.base import TrustedPackagesProtocol
2324
from twyn.trusted_packages.models import (
2425
TyposquatCheckResultEntry,
2526
TyposquatCheckResultFromSource,
2627
TyposquatCheckResults,
2728
)
2829
from twyn.trusted_packages.references.base import AbstractPackageReference
29-
from twyn.trusted_packages.trusted_packages import TrustedPackages
3030

3131
logger = logging.getLogger("twyn")
3232
logger.addHandler(logging.NullHandler())
@@ -134,7 +134,7 @@ def _analyze_dependencies_from_input(
134134
dependency_manager = get_dependency_manager_from_name(package_ecosystem)
135135
source = dependency_manager.get_alternative_source({"pypi": pypi_source, "npm": npm_source})
136136
top_package_reference = dependency_manager.trusted_packages_source(source, maybe_cache_handler)
137-
trusted_packages = TrustedPackages(
137+
trusted_packages = dependency_manager.trusted_packages_manager(
138138
names=top_package_reference.get_packages(),
139139
algorithm=EditDistance(),
140140
selector=selector_method,
@@ -177,7 +177,7 @@ def _analyze_packages_from_source(
177177
top_package_reference = manager.trusted_packages_source(source, maybe_cache_handler)
178178

179179
packages_from_source = top_package_reference.get_packages()
180-
trusted_packages = TrustedPackages(
180+
trusted_packages = manager.trusted_packages_manager(
181181
names=packages_from_source,
182182
algorithm=EditDistance(),
183183
selector=selector_method,
@@ -200,7 +200,7 @@ def _analyze_packages_from_source(
200200

201201
def _analyze_dependencies(
202202
top_package_reference: AbstractPackageReference,
203-
trusted_packages: TrustedPackages,
203+
trusted_packages: TrustedPackagesProtocol,
204204
packages: set[str],
205205
allowlist: set[str],
206206
show_progress_bar: bool,
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
from twyn.trusted_packages.managers.trusted_npm_packages_manager import TrustedNpmPackageManager
2+
from twyn.trusted_packages.managers.trusted_pypi_packages_manager import TrustedPackages
13
from twyn.trusted_packages.references.top_npm_reference import TopNpmReference
24
from twyn.trusted_packages.references.top_pypi_reference import TopPyPiReference
3-
from twyn.trusted_packages.trusted_packages import TrustedPackages
45

5-
__all__ = ["TopPyPiReference", "TrustedPackages", "TopNpmReference"]
6+
__all__ = ["TopPyPiReference", "TopNpmReference", "TrustedPackages", "TrustedNpmPackageManager"]

src/twyn/trusted_packages/cache_handler.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@
1616
class CacheEntry(BaseModel):
1717
saved_date: str
1818
"""ISO format date string when the cache entry was saved."""
19+
1920
packages: set[str]
20-
"""Set of trusted package names."""
21+
"""Set of all trusted package names including full namespaced packages like @scope/package."""
2122

2223
@field_validator("saved_date")
2324
@classmethod

src/twyn/trusted_packages/managers/__init__.py

Whitespace-only changes.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from collections import defaultdict
2+
from typing import Any, Protocol
3+
4+
from twyn.trusted_packages.models import TyposquatCheckResultEntry
5+
6+
OrderedPackages = defaultdict[str, set[str]]
7+
"""Type alias for mapping package names by ecosystem."""
8+
9+
10+
class TrustedPackagesProtocol(Protocol):
11+
def __contains__(self, obj: Any) -> bool: ...
12+
13+
def get_typosquat(self, package_name: str) -> TyposquatCheckResultEntry: ...
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from collections import defaultdict
2+
from typing import Any
3+
4+
from twyn.similarity.algorithm import (
5+
AbstractSimilarityAlgorithm,
6+
SimilarityThreshold,
7+
)
8+
from twyn.trusted_packages.managers.base import OrderedPackages
9+
from twyn.trusted_packages.models import TyposquatCheckResultEntry
10+
from twyn.trusted_packages.selectors import AbstractSelector
11+
12+
13+
class TrustedNpmPackageManager:
14+
"""Representation of namespaces that can be trusted."""
15+
16+
def __init__(
17+
self,
18+
names: set[str],
19+
algorithm: AbstractSimilarityAlgorithm,
20+
selector: AbstractSelector,
21+
threshold_class: type[SimilarityThreshold],
22+
) -> None:
23+
self.packages, self.namespaces = self._create_names_dictionary(names)
24+
25+
self.threshold_class = threshold_class
26+
self.selector = selector
27+
self.algorithm = algorithm
28+
29+
def __contains__(self, obj: Any) -> bool:
30+
"""Check if an object exists in the trusted namespaces."""
31+
if isinstance(obj, str):
32+
return obj in self.packages[obj[0]] or obj in self.namespaces
33+
return False
34+
35+
def _create_names_dictionary(self, names: set[str]) -> tuple[OrderedPackages, OrderedPackages]:
36+
"""Create a dictionary which will group all packages that start with the same letter under the same key."""
37+
first_letter_names: OrderedPackages = defaultdict(set)
38+
namespaces: OrderedPackages = defaultdict(set)
39+
for name in names:
40+
if name.startswith("@"):
41+
namespace, dependency = name.split("/")
42+
namespaces[namespace].add(dependency)
43+
else:
44+
first_letter_names[name[0]].add(name)
45+
return first_letter_names, namespaces
46+
47+
def _get_typosquats_from_namespace_dependency(self, package_name: str) -> TyposquatCheckResultEntry:
48+
namespace, dependency = package_name.split("/")
49+
threshold = self.threshold_class.from_name(namespace)
50+
typosquat_result = TyposquatCheckResultEntry(dependency=package_name)
51+
for trusted_namespace_name in self.selector.select_similar_names(
52+
names={"@": self.namespaces.keys()}, name=namespace
53+
):
54+
distance = self.algorithm.get_distance(namespace, trusted_namespace_name)
55+
if threshold.is_inside_threshold(distance) and dependency in self.namespaces[trusted_namespace_name]:
56+
typosquat_result.add(f"{trusted_namespace_name}/{dependency}")
57+
return typosquat_result
58+
59+
def _get_typosquats_from_dependency(self, package_name: str) -> TyposquatCheckResultEntry:
60+
threshold = self.threshold_class.from_name(package_name)
61+
typosquat_result = TyposquatCheckResultEntry(dependency=package_name)
62+
for trusted_package_name in self.selector.select_similar_names(names=self.packages, name=package_name):
63+
distance = self.algorithm.get_distance(package_name, trusted_package_name)
64+
if threshold.is_inside_threshold(distance):
65+
typosquat_result.add(trusted_package_name)
66+
return typosquat_result
67+
68+
def get_typosquat(self, package_name: str) -> TyposquatCheckResultEntry:
69+
"""Check if a given package name is similar to any trusted package and returns it.
70+
71+
Only if there is a match on the first letter can a package name be
72+
considered similar to another one. The algorithm provided and the threshold
73+
are used to determine if the package name can be considered similar.
74+
"""
75+
if package_name.startswith("@"):
76+
return self._get_typosquats_from_namespace_dependency(package_name)
77+
return self._get_typosquats_from_dependency(package_name)

src/twyn/trusted_packages/trusted_packages.py renamed to src/twyn/trusted_packages/managers/trusted_pypi_packages_manager.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,10 @@
55
AbstractSimilarityAlgorithm,
66
SimilarityThreshold,
77
)
8+
from twyn.trusted_packages.managers.base import OrderedPackages
89
from twyn.trusted_packages.models import TyposquatCheckResultEntry
910
from twyn.trusted_packages.selectors import AbstractSelector
1011

11-
_PackageNames = defaultdict[str, set[str]]
12-
"""Type alias for mapping package names by ecosystem."""
13-
1412

1513
class TrustedPackages:
1614
"""Representation of packages that can be trusted."""
@@ -22,7 +20,7 @@ def __init__(
2220
selector: AbstractSelector,
2321
threshold_class: type[SimilarityThreshold],
2422
) -> None:
25-
self.names: _PackageNames = self._create_names_dictionary(names)
23+
self.names = self._create_names_dictionary(names)
2624
self.threshold_class = threshold_class
2725
self.selector = selector
2826
self.algorithm = algorithm
@@ -34,17 +32,14 @@ def __contains__(self, obj: Any) -> bool:
3432
return False
3533

3634
@staticmethod
37-
def _create_names_dictionary(names: set[str]) -> _PackageNames:
35+
def _create_names_dictionary(names: set[str]) -> OrderedPackages:
3836
"""Create a dictionary which will group all packages that start with the same letter under the same key."""
3937
first_letter_names = defaultdict(set)
4038
for name in names:
4139
first_letter_names[name[0]].add(name)
4240
return first_letter_names
4341

44-
def get_typosquat(
45-
self,
46-
package_name: str,
47-
) -> TyposquatCheckResultEntry:
42+
def get_typosquat(self, package_name: str) -> TyposquatCheckResultEntry:
4843
"""Check if a given package name is similar to any trusted package and returns it.
4944
5045
Only if there is a match on the first letter can a package name be

src/twyn/trusted_packages/references/base.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import logging
22
from abc import abstractmethod
3+
from collections.abc import Iterator
4+
from dataclasses import dataclass, field
35
from datetime import datetime
46
from typing import Any
57

@@ -14,6 +16,35 @@
1416
logger = logging.getLogger("twyn")
1517

1618

19+
@dataclass
20+
class NormalizedPackages:
21+
packages: set[str]
22+
namespaces: dict[str, set[str]] | None = None
23+
_raw_namespaces_cached: set[str] = field(default_factory=set)
24+
25+
def __post__init__(self) -> None:
26+
if self.namespaces:
27+
for namespace in self.namespaces:
28+
for package_name in self.namespaces[namespace]:
29+
self._raw_namespaces_cached.add(f"{namespace}/{package_name}")
30+
31+
def __iter__(self) -> Iterator[str]:
32+
yield from self.packages
33+
34+
if not self.namespaces:
35+
return
36+
37+
for namespace in self.namespaces:
38+
for package_name in self.namespaces[namespace]:
39+
yield f"{namespace}/{package_name}"
40+
41+
def __contains__(self, value: str) -> bool:
42+
if not isinstance(value, str):
43+
return False
44+
45+
return value in self.packages or value in self._raw_namespaces_cached
46+
47+
1748
class AbstractPackageReference:
1849
"""Represents a reference from where to retrieve trusted packages.
1950
@@ -32,7 +63,7 @@ def __init__(self, source: str | None = None, cache_handler: CacheHandler | None
3263

3364
@staticmethod
3465
@abstractmethod
35-
def normalize_packages(packages: set[str]) -> set[str]:
66+
def normalize_packages(packages: set[str]) -> NormalizedPackages:
3667
"""Normalize package names to make sure they're valid within the package manager context."""
3768

3869
def _download(self) -> dict[str, Any]:
@@ -64,7 +95,7 @@ def _get_packages_from_cache_if_enabled(self) -> set[str]:
6495

6596
return cache_entry.packages
6697

67-
def get_packages(self) -> set[str]:
98+
def get_packages(self) -> NormalizedPackages:
6899
"""Download and parse online source of top packages from the package ecosystem."""
69100
packages = self._get_packages_from_cache_if_enabled()
70101
# we don't save the cache here, we keep it as it is so the date remains the original one.
@@ -84,5 +115,4 @@ def get_packages(self) -> set[str]:
84115
# New packages were downloaded, we create a new entry updating all values.
85116
self._save_trusted_packages_to_cache_if_enabled(packages)
86117

87-
normalized_packages = self.normalize_packages(packages)
88-
return normalized_packages
118+
return self.normalize_packages(packages)

0 commit comments

Comments
 (0)