Skip to content

Commit 583c262

Browse files
authored
feat: save downloaded packages to cache (#270)
1 parent fe16fb4 commit 583c262

File tree

17 files changed

+407
-93
lines changed

17 files changed

+407
-93
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
*.pyc
55
__pycache__
66
db.sqlite3
7-
7+
.twyn/
88
# Backup files #
99
*.bak
1010

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,18 @@ dev = [
4949
"pytest<9.0.0,>=7.1.3",
5050
"mypy<1.17,>=0.982",
5151
"pytest-cov<7,>=4",
52-
5352
"ruff<0.12.4,>=0.5.1",
5453
"types-requests<3.0.0.0,>=2.32.4.20250611",
54+
"types-python-dateutil>=2.9.0.20250809",
55+
"freezegun>=1.5.5",
5556
]
5657
local = [
5758
"ipdb<1.0.0,>=0.13.9",
5859
"commitizen<5.0,>=2.38",
5960
"pdbpp<1.0.0,>=0.11.6",
6061
]
6162

63+
6264
[build-system]
6365
requires = ["hatchling"]
6466
build-backend = "hatchling.build"

src/twyn/base/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import re
2+
3+
4+
def _normalize_packages(packages: set[str]) -> set[str]:
5+
"""Normalize dependency names according to PyPi https://packaging.python.org/en/latest/specifications/name-normalization/."""
6+
return {re.sub(r"[-_.]+", "-", name).lower() for name in packages}

src/twyn/cli.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,20 @@ def entry_point() -> None:
5858
default=False,
5959
is_flag=True,
6060
)
61+
@click.option(
62+
"--no-cache",
63+
is_flag=True,
64+
default=False,
65+
help="Disable use of the trusted packages cache. Always fetch from the source.",
66+
)
6167
def run(
6268
config: str,
6369
dependency_file: Optional[str],
6470
dependency: tuple[str],
6571
selector_method: str,
6672
v: bool,
6773
vv: bool,
74+
no_cache: bool,
6875
) -> int:
6976
if v and vv:
7077
raise click.UsageError(
@@ -92,6 +99,7 @@ def run(
9299
dependency_file=dependency_file,
93100
selector_method=selector_method,
94101
verbosity=verbosity,
102+
use_cache=not no_cache,
95103
)
96104

97105
if errors:

src/twyn/dependency_parser/abstract_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def _read(self) -> str:
2626
return self.file_handler.read()
2727

2828
def file_exists(self) -> bool:
29-
return self.file_handler.file_exists()
29+
return self.file_handler.exists()
3030

3131
@abstractmethod
3232
def parse(self) -> set[str]:

src/twyn/file_handler/file_handler.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
class BaseFileHandler(Protocol):
1313
def read(self) -> str: ...
14-
def file_exists(self) -> bool: ...
14+
def exists(self) -> bool: ...
1515
def write(self, data: str) -> None: ...
1616

1717

@@ -33,7 +33,7 @@ def read(self) -> str:
3333

3434
return content
3535

36-
def file_exists(self) -> bool:
36+
def exists(self) -> bool:
3737
try:
3838
self._raise_for_file_exists()
3939
except TwynError:
@@ -48,5 +48,4 @@ def _raise_for_file_exists(self) -> None:
4848
raise PathIsNotFileError
4949

5050
def write(self, data: str) -> None:
51-
self._raise_for_file_exists()
5251
self.file_path.write_text(data)

src/twyn/main.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import logging
2-
import re
32
from typing import Optional
43

54
from rich.logging import RichHandler
@@ -11,6 +10,7 @@
1110
AvailableLoggingLevels,
1211
SelectorMethod,
1312
)
13+
from twyn.base.utils import _normalize_packages
1414
from twyn.config.config_handler import ConfigHandler
1515
from twyn.dependency_parser.dependency_selector import DependencySelector
1616
from twyn.file_handler.file_handler import FileHandler
@@ -36,6 +36,7 @@ def check_dependencies(
3636
dependency_file: Optional[str] = None,
3737
dependencies: Optional[set[str]] = None,
3838
verbosity: AvailableLoggingLevels = AvailableLoggingLevels.none,
39+
use_cache: bool = True,
3940
) -> list[TyposquatCheckResult]:
4041
"""Check if dependencies could be typosquats."""
4142
config_file_handler = FileHandler(config_file or DEFAULT_PROJECT_TOML_FILE)
@@ -45,7 +46,7 @@ def check_dependencies(
4546
_set_logging_level(config.logging_level)
4647

4748
trusted_packages = TrustedPackages(
48-
names=TopPyPiReference(source=config.pypi_reference).get_packages(),
49+
names=TopPyPiReference(source=config.pypi_reference).get_packages(use_cache),
4950
algorithm=EditDistance(),
5051
selector=get_candidate_selector(config.selector_method),
5152
threshold_class=SimilarityThreshold,
@@ -84,8 +85,3 @@ def get_parsed_dependencies_from_file(dependency_file: Optional[str] = None) ->
8485
dependencies = dependency_parser.parse()
8586
logger.debug("Successfully parsed local dependencies file.")
8687
return dependencies
87-
88-
89-
def _normalize_packages(packages: set[str]) -> set[str]:
90-
"""Normalize dependency names according to PyPi https://packaging.python.org/en/latest/specifications/name-normalization/."""
91-
return {re.sub(r"[-_.]+", "-", name).lower() for name in packages}

src/twyn/trusted_packages/constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# Cache configuration constants
2+
TRUSTED_PACKAGES_FILE_PATH = ".twyn/trusted_packages.json"
3+
TRUSTED_PACKAGES_MAX_RETENTION_DAYS = 30
4+
5+
16
ADJACENCY_MATRIX = {
27
"1": ["2", "q", "w"],
38
"2": ["1", "3", "q", "w"],

src/twyn/trusted_packages/exceptions.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,8 @@ class EmptyPackagesListError(TwynError):
1313
message = "Downloaded packages list is empty"
1414

1515

16-
class CharacterNotInMatrixError(TwynError, KeyError): ...
16+
class CharacterNotInMatrixError(TwynError, KeyError): ... # TODO add comments
17+
18+
19+
class InvalidCacheError(TwynError):
20+
"""Error for when the cache content is not valid."""

src/twyn/trusted_packages/references.py

Lines changed: 88 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
1+
import json
12
import logging
23
from abc import ABC, abstractmethod
4+
from datetime import date, datetime
35
from typing import Any
46

57
import requests
68

9+
from twyn.base.utils import _normalize_packages
10+
from twyn.file_handler.file_handler import FileHandler
11+
from twyn.trusted_packages.constants import TRUSTED_PACKAGES_FILE_PATH, TRUSTED_PACKAGES_MAX_RETENTION_DAYS
712
from twyn.trusted_packages.exceptions import (
813
EmptyPackagesListError,
14+
InvalidCacheError,
915
InvalidJSONError,
1016
InvalidPyPiFormatError,
1117
)
@@ -20,17 +26,95 @@ def __init__(self, source: str) -> None:
2026
self.source = source
2127

2228
@abstractmethod
23-
def get_packages(self) -> set[str]:
29+
def get_packages(self, use_cache: bool = True) -> set[str]:
2430
"""Return the names of the trusted packages available in the reference."""
2531

2632

2733
class TopPyPiReference(AbstractPackageReference):
2834
"""Top PyPi packages retrieved from an online source."""
2935

30-
def get_packages(self) -> set[str]:
36+
def get_packages(self, use_cache: bool = True) -> set[str]:
3137
"""Download and parse online source of top Python Package Index packages."""
32-
packages_info = self._download()
33-
return self._parse(packages_info)
38+
packages_to_use = set()
39+
if use_cache:
40+
trusted_packages_file = FileHandler(TRUSTED_PACKAGES_FILE_PATH)
41+
packages_to_use = self._get_packages_from_cache(trusted_packages_file)
42+
# we don't save the cache here, we keep it as it is so the date remains the original one.
43+
44+
if not packages_to_use:
45+
# no cache usage, no cache hit (non-existent or outdated) or cache was empty.
46+
logger.info("Fetching trusted packages from PyPI reference...")
47+
packages_to_use = self._parse(self._download())
48+
if use_cache:
49+
self._save_trusted_packages_to_file(packages_to_use, trusted_packages_file, self.source)
50+
51+
normalized_packages = _normalize_packages(packages_to_use)
52+
return normalized_packages
53+
54+
def _is_content_outdated(self, content_date: date) -> bool:
55+
"""Check if cached content is outdated based on retention days."""
56+
days_diff = (datetime.today().date() - content_date).days
57+
return days_diff > TRUSTED_PACKAGES_MAX_RETENTION_DAYS
58+
59+
def _save_trusted_packages_to_file(self, packages: set[str], file_handler: FileHandler, source: str) -> None:
60+
"""Save trusted packages to JSON file with timestamp."""
61+
trusted_data = {
62+
"source": source,
63+
"data": {
64+
"packages": list(packages),
65+
"count": len(packages),
66+
"saved_date": datetime.now().date().isoformat(),
67+
},
68+
}
69+
file_handler.file_path.parent.mkdir(parents=True, exist_ok=True)
70+
file_handler.write(json.dumps(trusted_data))
71+
logger.debug("Saved %d trusted packages to %s", len(packages), file_handler.file_path)
72+
73+
def _load_trusted_packages_from_file(self, file_handler: FileHandler) -> tuple[set[str], bool]:
74+
"""Load trusted packages from JSON file and check if it's outdated."""
75+
try:
76+
try:
77+
trusted_packages_raw_content = json.loads(file_handler.read())
78+
except json.JSONDecodeError as e:
79+
raise InvalidCacheError("Could not decode cache.") from e
80+
81+
try:
82+
data = trusted_packages_raw_content["data"]
83+
saved_date_str = data["saved_date"]
84+
except KeyError as e:
85+
raise InvalidCacheError("Invalid cache format.") from e
86+
87+
try:
88+
saved_date = datetime.fromisoformat(saved_date_str).date()
89+
except ValueError as e:
90+
raise InvalidCacheError("Cache saved date is invalid.") from e
91+
92+
try:
93+
packages = set(data["packages"])
94+
except TypeError as e:
95+
raise InvalidCacheError("Invalid format in cached packages") from e
96+
97+
is_outdated = self._is_content_outdated(saved_date)
98+
99+
except InvalidCacheError as e:
100+
logger.warning("Error reading cached trusted packages: %s", e)
101+
return set(), True
102+
else:
103+
if is_outdated:
104+
logger.info("Cached trusted packages are outdated (saved: %s)", saved_date)
105+
else:
106+
logger.debug("Using cached trusted packages from %s", saved_date)
107+
108+
return packages, is_outdated
109+
110+
def _get_packages_from_cache(self, trusted_packages_file: FileHandler) -> set[str]:
111+
"""Get packages from cache file if it's present and up to date."""
112+
if trusted_packages_file.exists():
113+
packages_from_cache, is_outdated = self._load_trusted_packages_from_file(trusted_packages_file)
114+
if not is_outdated:
115+
return packages_from_cache
116+
117+
return set()
34118

35119
def _download(self) -> dict[str, Any]:
36120
packages = requests.get(self.source)

0 commit comments

Comments
 (0)