Skip to content

Commit c741322

Browse files
authored
feat: support multiple sources and create CacheHandler (#274)
1 parent 89b6239 commit c741322

File tree

15 files changed

+590
-212
lines changed

15 files changed

+590
-212
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ dependencies = [
1919
"pyparsing<4.0.0,>=3.2.3",
2020
"tomlkit<0.14.0,>=0.11.6",
2121
"tomli<3.0.0,>=2.2.1; python_version < \"3.13\"",
22+
"pydantic>=2.11.7,<3.0.0",
2223
]
2324
name = "twyn"
2425
description = "Security tool against dependency typosquatting attacks"

src/twyn/base/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,7 @@ def __init__(self, message: str = "") -> None:
1515
def show(self, file: Optional[IO[Any]] = None) -> None:
1616
logger.debug(self.format_message(), exc_info=True)
1717
logger.error(self.format_message(), exc_info=False)
18+
19+
20+
class PackageNormalizingError(TwynError):
21+
"""Exception for when it is not possible to normalize a package name."""

src/twyn/base/utils.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
import re
22

3+
from twyn.base.exceptions import PackageNormalizingError
34

4-
def _normalize_packages(packages: set[str]) -> set[str]:
5+
6+
def normalize_packages(packages: set[str]) -> set[str]:
57
"""Normalize dependency names according to PyPi https://packaging.python.org/en/latest/specifications/name-normalization/."""
6-
return {re.sub(r"[-_.]+", "-", name).lower() for name in packages}
8+
renamed_packages = {re.sub(r"[-_.]+", "-", name).lower() for name in packages}
9+
10+
pattern = re.compile(r"^([a-z0-9]|[a-z0-9][a-z0-9._-]*[a-z0-9])\Z")
11+
for package in renamed_packages:
12+
if not pattern.match(package):
13+
raise PackageNormalizingError(f"Package name '{package}' does not match required pattern")
14+
15+
return renamed_packages

src/twyn/cli.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
from twyn.config.config_handler import ConfigHandler
1616
from twyn.file_handler.file_handler import FileHandler
1717
from twyn.main import check_dependencies
18-
from twyn.trusted_packages.constants import TRUSTED_PACKAGES_FILE_PATH
18+
from twyn.trusted_packages.cache_handler import CacheHandler
19+
from twyn.trusted_packages.constants import CACHE_DIR
1920

2021
logging.basicConfig(
2122
format="%(message)s",
@@ -154,13 +155,11 @@ def cache() -> None:
154155

155156
@cache.command()
156157
def clear() -> None:
157-
fp = FileHandler(TRUSTED_PACKAGES_FILE_PATH).file_path
158-
if fp.exists():
159-
fp.unlink()
160-
fp.parent.rmdir()
161-
logger.warning("Cache has been cleared")
162-
else:
163-
logger.warning("Could not clear cache. Cache file not found.")
158+
"""Clear cached trusted packages data."""
159+
cache_handler = CacheHandler(CACHE_DIR)
160+
161+
cache_handler.clear_all()
162+
click.echo(click.style("All cache cleared", fg="green"))
164163

165164

166165
if __name__ == "__main__":

src/twyn/file_handler/file_handler.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from pathlib import Path
44
from typing import Protocol
55

6-
from twyn.base.exceptions import TwynError
76
from twyn.file_handler.exceptions import PathIsNotFileError, PathNotFoundError
87

98
logger = logging.getLogger("twyn")
@@ -13,6 +12,7 @@ class BaseFileHandler(Protocol):
1312
def read(self) -> str: ...
1413
def exists(self) -> bool: ...
1514
def write(self, data: str) -> None: ...
15+
def delete(self, remove_parent_dir: bool) -> None: ...
1616

1717

1818
class FileHandler(BaseFileHandler):
@@ -36,7 +36,7 @@ def read(self) -> str:
3636
def exists(self) -> bool:
3737
try:
3838
self._raise_for_file_exists()
39-
except TwynError:
39+
except (PathNotFoundError, PathIsNotFileError):
4040
return False
4141
return True
4242

@@ -49,3 +49,20 @@ def _raise_for_file_exists(self) -> None:
4949

5050
def write(self, data: str) -> None:
5151
self.file_path.write_text(data)
52+
53+
def delete(self, delete_parent_dir: bool = False) -> None:
54+
if not self.exists():
55+
logger.info("File does not exist, nothing to delete")
56+
return
57+
58+
self.file_path.unlink()
59+
logger.info("Deleted file: %s", self.file_path)
60+
61+
if delete_parent_dir:
62+
try:
63+
self.file_path.parent.rmdir()
64+
logger.info("Removed empty directory: %s", self.file_path.parent)
65+
except OSError:
66+
logger.exception(
67+
"Directory not empty or not enough permissions. Cannot be removed: %s", self.file_path.parent
68+
)

src/twyn/main.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@
99
AvailableLoggingLevels,
1010
SelectorMethod,
1111
)
12-
from twyn.base.utils import _normalize_packages
12+
from twyn.base.utils import normalize_packages
1313
from twyn.config.config_handler import ConfigHandler
1414
from twyn.dependency_parser.dependency_selector import DependencySelector
1515
from twyn.file_handler.file_handler import FileHandler
1616
from twyn.similarity.algorithm import EditDistance, SimilarityThreshold
1717
from twyn.trusted_packages import TopPyPiReference
18+
from twyn.trusted_packages.cache_handler import CacheHandler
1819
from twyn.trusted_packages.selectors import AbstractSelector
1920
from twyn.trusted_packages.trusted_packages import (
2021
TrustedPackages,
@@ -39,15 +40,16 @@ def check_dependencies(
3940
)
4041
_set_logging_level(config.logging_level)
4142

43+
cache_handler = CacheHandler()
4244
trusted_packages = TrustedPackages(
43-
names=TopPyPiReference(source=config.pypi_reference).get_packages(use_cache),
45+
names=TopPyPiReference(source=config.pypi_reference, cache_handler=cache_handler).get_packages(use_cache),
4446
algorithm=EditDistance(),
4547
selector=get_candidate_selector(config.selector_method),
4648
threshold_class=SimilarityThreshold,
4749
)
48-
normalized_allowlist_packages = _normalize_packages(config.allowlist)
50+
normalized_allowlist_packages = normalize_packages(config.allowlist)
4951
dependencies = dependencies if dependencies else get_parsed_dependencies_from_file(config.dependency_file)
50-
normalized_dependencies = _normalize_packages(dependencies)
52+
normalized_dependencies = normalize_packages(dependencies)
5153

5254
errors: list[TyposquatCheckResult] = []
5355
for dependency in track(normalized_dependencies, description="Processing..."):
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import json
2+
import logging
3+
import os
4+
from datetime import datetime
5+
from hashlib import md5
6+
from pathlib import Path
7+
from typing import Optional
8+
9+
from pydantic import BaseModel, ValidationError, field_validator
10+
11+
from twyn.base.exceptions import PackageNormalizingError
12+
from twyn.base.utils import normalize_packages
13+
from twyn.file_handler.exceptions import PathIsNotFileError, PathNotFoundError
14+
from twyn.file_handler.file_handler import FileHandler
15+
from twyn.trusted_packages.constants import CACHE_DIR, TRUSTED_PACKAGES_MAX_RETENTION_DAYS
16+
17+
logger = logging.getLogger("twyn")
18+
19+
20+
class CacheEntry(BaseModel):
21+
saved_date: str
22+
packages: set[str]
23+
24+
@field_validator("saved_date")
25+
@classmethod
26+
def validate_saved_date(cls, v: str) -> str:
27+
try:
28+
datetime.fromisoformat(v)
29+
except (ValueError, TypeError) as e:
30+
raise ValueError(f"Invalid saved_date format: {e}") from e
31+
else:
32+
return v
33+
34+
@field_validator("packages")
35+
@classmethod
36+
def validate_packages(cls, v: set[str]) -> set[str]:
37+
try:
38+
return normalize_packages(v)
39+
except PackageNormalizingError as e:
40+
raise ValueError(f"Failed to normalize packages: {e}") from e
41+
42+
43+
class CacheHandler:
44+
"""Cache class that provides basic read/write/delete operation for individual source cache files."""
45+
46+
def __init__(self, cache_dir: str = CACHE_DIR) -> None:
47+
self.cache_dir = cache_dir
48+
49+
def write_entry(self, source: str, data: CacheEntry) -> None:
50+
"""Save cache entry to source-specific cache file."""
51+
file_handler = self._get_file_handler(source)
52+
# Ensure parent directory exists
53+
file_handler.file_path.parent.mkdir(parents=True, exist_ok=True)
54+
file_handler.write(data.model_dump_json())
55+
logger.debug("Successfully wrote cache data to %s", file_handler.file_path)
56+
57+
def get_cache_entry(self, source: str) -> Optional[CacheEntry]:
58+
"""Retrieve cache entry from source-specific cache file."""
59+
file_handler = self._get_file_handler(source)
60+
try:
61+
content = file_handler.read()
62+
except (PathNotFoundError, PathIsNotFileError):
63+
logger.debug("Cache file not found: %s", file_handler.file_path)
64+
return None
65+
66+
try:
67+
json_content = json.loads(content)
68+
except json.JSONDecodeError as e:
69+
logger.warning("Failed to decode JSON from cache file %s: %s", file_handler.file_path, e)
70+
return None
71+
72+
if not json_content:
73+
return None
74+
75+
try:
76+
entry = CacheEntry(**json_content)
77+
if not self.is_entry_outdated(entry):
78+
return entry
79+
except ValidationError:
80+
logger.warning("Could not read cache for source %s. Cache is corrupt.", source)
81+
self._clear_entry(source)
82+
83+
return None
84+
85+
def is_entry_outdated(self, entry: CacheEntry) -> bool:
86+
"""Check if a cache entry is outdated based on retention days."""
87+
try:
88+
saved_date = datetime.fromisoformat(entry.saved_date).date()
89+
days_diff = (datetime.today().date() - saved_date).days
90+
except (ValueError, AttributeError):
91+
logger.warning("Invalid date format in cache entry")
92+
return True
93+
else:
94+
return days_diff > TRUSTED_PACKAGES_MAX_RETENTION_DAYS
95+
96+
def clear_all(self) -> None:
97+
"""Delete all cache files in the cache directory."""
98+
for root, _dirs, files in os.walk(self.cache_dir):
99+
for file in files:
100+
if file.endswith(".json"):
101+
FileHandler(os.path.join(root, file)).delete()
102+
103+
# Remove parent directory if it exists and is empty
104+
cache_path = Path(self.cache_dir)
105+
if cache_path.exists() and cache_path.is_dir():
106+
try:
107+
cache_path.rmdir()
108+
except OSError:
109+
logger.exception("Could not delete cache directory.")
110+
111+
def get_cache_file_path(self, source: str) -> str:
112+
"""Generate cache file path for a specific source."""
113+
safe_filename = md5(source.encode()).hexdigest()
114+
return str(Path(self.cache_dir) / f"{safe_filename}.json")
115+
116+
def _get_file_handler(self, source: str) -> FileHandler:
117+
"""Get file handler for a specific source cache file."""
118+
cache_file_path = self.get_cache_file_path(source)
119+
return FileHandler(cache_file_path)
120+
121+
def _clear_entry(self, source: str) -> None:
122+
"""Delete cache file for a specific source."""
123+
file_handler = self._get_file_handler(source)
124+
file_handler.delete(delete_parent_dir=False)

src/twyn/trusted_packages/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Cache configuration constants
2-
TRUSTED_PACKAGES_FILE_PATH = ".twyn/trusted_packages.json"
2+
CACHE_DIR = ".twyn"
33
TRUSTED_PACKAGES_MAX_RETENTION_DAYS = 30
44

55

0 commit comments

Comments
 (0)