diff --git a/pisek/jobs/cache.py b/pisek/jobs/cache.py index 3ab2b35a..35067196 100644 --- a/pisek/jobs/cache.py +++ b/pisek/jobs/cache.py @@ -4,7 +4,7 @@ # Copyright (c) 2019 - 2022 Jiří Beneš # Copyright (c) 2020 - 2022 Michal Töpfer # Copyright (c) 2022 Jiří Kalvoda -# Copyright (c) 2023 Daniel Skýpala +# Copyright (c) 2023 Daniel Skýpala # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ from pisek.version import __version__ from pisek.utils.text import eprint from pisek.utils.colors import color_settings -from pisek.utils.paths import INTERNALS_DIR +from pisek.utils.paths import INTERNALS_DIR, TaskPath from pisek.jobs.logging import LogEntry CACHE_VERSION_FILE = os.path.join(INTERNALS_DIR, "cache_version") @@ -34,6 +34,21 @@ CACHE_SAVE_INTERVAL = 1 # seconds +@dataclass(frozen=True, order=True) +class GlobsToFilesArgs: + globs: tuple[str, ...] + directory: TaskPath + exclude: tuple[str, ...] + + def __init__( + self, globs: Iterable[str], directory: TaskPath, exclude: Iterable[str] + ) -> None: + # Sigh, frozen dataclasses + object.__setattr__(self, "globs", tuple(sorted(globs))) + object.__setattr__(self, "directory", directory) + object.__setattr__(self, "exclude", tuple(sorted(exclude))) + + @dataclass class CacheEntry: """Object representing single cached job.""" @@ -44,7 +59,7 @@ class CacheEntry: prerequisites_results: list[str] envs: list[tuple[str, ...]] files: list[str] - globs: list[str] + globs: list[GlobsToFilesArgs] output: list[tuple[str, bool]] logs: list[LogEntry] @@ -55,7 +70,7 @@ def __init__( cached_attributes: dict[str, Any], envs: Iterable[tuple[str, ...]], files: Iterable[str], - globs: Iterable[str], + globs: Iterable[GlobsToFilesArgs], prerequisites_results: Iterable[str], output: list[tuple[str, bool]], logs: list[LogEntry], @@ -63,10 +78,10 @@ def __init__( self.name = name self.signature = signature self.cached_attributes = dict(sorted(cached_attributes.items())) - self.prerequisites_results = list(sorted(prerequisites_results)) - self.envs = list(sorted(envs)) - self.files = list(sorted(files)) - self.globs = list(sorted(globs)) + self.prerequisites_results = sorted(prerequisites_results) + self.envs = sorted(envs) + self.files = sorted(files) + self.globs = sorted(globs) self.output = output self.logs = logs diff --git a/pisek/jobs/jobs.py b/pisek/jobs/jobs.py index 99209c9b..54b9d884 100644 --- a/pisek/jobs/jobs.py +++ b/pisek/jobs/jobs.py @@ -4,7 +4,7 @@ # Copyright (c) 2019 - 2022 Jiří Beneš # Copyright (c) 2020 - 2022 Michal Töpfer # Copyright (c) 2022 Jiří Kalvoda -# Copyright (c) 2023 Daniel Skýpala +# Copyright (c) 2023 Daniel Skýpala # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,6 +28,7 @@ AbstractSet, Any, Callable, + Iterable, MutableSet, NamedTuple, Optional, @@ -35,8 +36,9 @@ ) from pisek.jobs.logging import log, LogLevel, LogEntry -from pisek.jobs.cache import Cache, CacheEntry +from pisek.jobs.cache import Cache, CacheEntry, GlobsToFilesArgs from pisek.utils.paths import TaskPath +from pisek.utils.util import globs_to_files if TYPE_CHECKING: from pisek.env.env import Env @@ -197,7 +199,7 @@ def __init__(self, env: "Env", name: str) -> None: self._cached_attributes: list[str] = ["result"] self._env = env self._accessed_envs: MutableSet[tuple[str, ...]] = set() - self._accessed_globs: MutableSet[str] = set() + self._accessed_globs: MutableSet[GlobsToFilesArgs] = set() self._accessed_files: MutableSet[str] = set() self._logs: list[LogEntry] = [] self.name = name @@ -225,24 +227,24 @@ def _signature( self, envs: AbstractSet[tuple[str, ...]], paths: AbstractSet[str], - globs: AbstractSet[str], + globs: AbstractSet[GlobsToFilesArgs], results: dict[str, Any], cache: Cache, - ) -> tuple[Optional[str], Optional[str]]: + ) -> tuple[str | None, str | None]: """Compute a signature (i.e. hash) of given envs, files and prerequisites results.""" sign = hashlib.sha256() - sign.update(f"{self.__class__.__name__}\n".encode()) + sign.update(f"{self.__class__.__name__}\00".encode()) for i, arg in enumerate(self._args): - sign.update(f"{i}={arg}\n".encode()) + sign.update(f"{i}={arg}\00".encode()) for key, val in self._kwargs.items(): - sign.update(f"{key}={val}\n".encode()) + sign.update(f"{key}={val}\00".encode()) for env_key in sorted(envs): try: value = self._env.get_compound(env_key) except (AttributeError, TypeError, ValueError, KeyError): return (None, f"Key nonexistent: {env_key}") - sign.update(f"{env_key}={value}\n".encode()) + sign.update(f"{env_key}={value}\00".encode()) for path in sorted(paths): while os.path.islink(path): @@ -251,17 +253,15 @@ def _signature( ) if os.path.isfile(path): - sign.update(f"{path}={cache.file_hash(path)}\n".encode()) + sign.update(f"{path}={cache.file_hash(path)}\00".encode()) elif os.path.isdir(path): - sign.update(f"{path} is directory\n".encode()) + sign.update(f"{path} is directory\00".encode()) else: return (None, f"File nonexistent: {path}") - for g in sorted(globs): - glob_sign = f"{g} -> " + " ".join( - glob.glob(g, recursive=True, include_hidden=True) - ) - sign.update(glob_sign.encode()) + for args in sorted(globs): + files = globs_to_files(args.globs, args.directory, args.exclude) + sign.update(f"{args.globs}\n{args.exclude}\n{files}\00".encode()) for name, result in sorted(results.items()): # Trying to prevent hashing object.__str__ which is non-deterministic diff --git a/pisek/task_jobs/builder/build.py b/pisek/task_jobs/builder/build.py index 641bc696..8d15d096 100644 --- a/pisek/task_jobs/builder/build.py +++ b/pisek/task_jobs/builder/build.py @@ -1,6 +1,6 @@ # pisek - Tool for developing tasks for programming competitions. # -# Copyright (c) 2023 Daniel Skýpala +# Copyright (c) 2023 Daniel Skýpala # Copyright (c) 2024 Benjamin Swart # This program is free software: you can redistribute it and/or modify @@ -165,7 +165,12 @@ def _run(self) -> None: dst = os.path.join(workdir, path.name) if self._is_dir(path): shutil.copytree(path.path, dst) - self._access_dir(path) + for exclude_path in strategy_cls.exclude_paths: + fullpath = os.path.join(dst, exclude_path) + if os.path.exists(fullpath): + shutil.rmtree(fullpath) + + self._access_dir(path, exclude_paths=strategy_cls.exclude_paths) subdir = dst elif self._is_file(path): shutil.copy(path.path, dst) diff --git a/pisek/task_jobs/builder/strategies.py b/pisek/task_jobs/builder/strategies.py index 368a7613..93ecf18b 100644 --- a/pisek/task_jobs/builder/strategies.py +++ b/pisek/task_jobs/builder/strategies.py @@ -1,6 +1,6 @@ # pisek - Tool for developing tasks for programming competitions. # -# Copyright (c) 2023 Daniel Skýpala +# Copyright (c) 2023 Daniel Skýpala # Copyright (c) 2024 Benjamin Swart # Copyright (c) 2025 Antonín Maloň @@ -61,6 +61,7 @@ class BuildStrategy(ABC): name: BuildStrategyName extra_sources: Optional[str] = None extra_nonsources: Optional[str] = None + exclude_paths: list[str] = [] def __init__( self, @@ -467,6 +468,7 @@ class Cargo(BuildStrategy): name = BuildStrategyName.cargo _target_subdir: str = "target" _artifact_dir: str = ".pisek-executables" + exclude_paths: list[str] = [_target_subdir] @classmethod def applicable_on_files(cls, build: "BuildSection", sources: list[str]) -> bool: @@ -479,10 +481,7 @@ def applicable_on_directory(cls, build: "BuildSection", directory: str) -> bool: def _build(self) -> str: directory = self._listdir()[0] with FakeChangedCWD(self, directory): - if self._exists(self._target_subdir): - raise PipelineItemFailure( - f"Cargo strategy: '{self._target_subdir}' already exists" - ) + assert not self._exists(self._target_subdir) args = [ "--release", diff --git a/pisek/task_jobs/task_job.py b/pisek/task_jobs/task_job.py index 1d5da601..995e2289 100644 --- a/pisek/task_jobs/task_job.py +++ b/pisek/task_jobs/task_job.py @@ -32,9 +32,11 @@ import subprocess from pisek.utils.text import pad, pad_left from pisek.utils.terminal import MSG_LEN +from pisek.utils.util import globs_to_files from pisek.env.env import Env from pisek.utils.paths import TaskPath from pisek.utils.text import tab +from pisek.jobs.cache import GlobsToFilesArgs from pisek.jobs.jobs import State, Job, PipelineItemFailure, PipelineItemAbort from pisek.task_jobs.run_result import RunResult @@ -46,16 +48,10 @@ class TaskHelper: _env: Env def _globs_to_files( - self, globs: Iterable[str], directory: TaskPath + self, globs: Iterable[str], directory: TaskPath, exclude: Iterable[str] = () ) -> list[TaskPath]: """Get files in given directory that match any glob.""" - files_per_glob = [ - glob.glob(g, root_dir=directory.path, recursive=True, include_hidden=True) - for g in globs - ] - files: list[str] = sum(files_per_glob, start=[]) - files = list(sorted(set(files))) - return [TaskPath.from_abspath(directory.path, file) for file in files] + return globs_to_files(globs, directory, exclude) def _format_points(self, points: Decimal | int | None) -> str: precision = self._env.config.task.score_precision @@ -194,8 +190,8 @@ def _format_run_result( class TaskJob(Job, TaskHelper): """Job class that implements useful methods""" - def _access_dir(self, dirname: TaskPath) -> None: - for file in self._globs_to_files(["**"], dirname): + def _access_dir(self, dirname: TaskPath, exclude_paths: Iterable[str] = ()) -> None: + for file in self._globs_to_files(["**"], dirname, exclude=exclude_paths): self._access_file(file) @staticmethod @@ -310,10 +306,10 @@ def _files_equal(self, file_a: TaskPath, file_b: TaskPath) -> bool: return filecmp.cmp(file_a.path, file_b.path, shallow=False) def _globs_to_files( - self, globs: Iterable[str], directory: TaskPath + self, globs: Iterable[str], directory: TaskPath, exclude: Iterable[str] = () ) -> list[TaskPath]: - self._accessed_globs |= set(os.path.join(directory.path, g) for g in globs) - return super()._globs_to_files(globs, directory) + self._accessed_globs.add(GlobsToFilesArgs(globs, directory, exclude)) + return super()._globs_to_files(globs, directory, exclude) def _run_subprocess(self, *args, **kwargs) -> subprocess.Popen: process = subprocess.Popen(*args, **kwargs) diff --git a/pisek/utils/paths.py b/pisek/utils/paths.py index a7a454a3..07c21f71 100644 --- a/pisek/utils/paths.py +++ b/pisek/utils/paths.py @@ -4,7 +4,7 @@ # Copyright (c) 2019 - 2022 Jiří Beneš # Copyright (c) 2020 - 2022 Michal Töpfer # Copyright (c) 2022 Jiří Kalvoda -# Copyright (c) 2023 Daniel Skýpala +# Copyright (c) 2023 Daniel Skýpala # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -115,6 +115,9 @@ def data_path(*path: str) -> "TaskPath": def generated_path(*path: str) -> "TaskPath": return TaskPath.data_path(GENERATED_SUBDIR, *path) + def is_prefix(self, task_path: "TaskPath") -> bool: + return os.path.commonpath([self.abspath, task_path.abspath]) == self.abspath + # ----- interfaces ----- diff --git a/pisek/utils/util.py b/pisek/utils/util.py index e7e1ffb3..72791a9a 100644 --- a/pisek/utils/util.py +++ b/pisek/utils/util.py @@ -14,10 +14,12 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import glob import os import shutil +from typing import Iterable -from pisek.utils.paths import BUILD_DIR, TESTS_DIR, INTERNALS_DIR +from pisek.utils.paths import BUILD_DIR, TESTS_DIR, INTERNALS_DIR, TaskPath class ChangedCWD: @@ -48,3 +50,18 @@ def clean_non_relevant_files(accessed_files: set[str]) -> None: path = os.path.join(root, file) if root in accessed_dirs and path not in accessed_files: os.remove(path) + + +def globs_to_files( + globs: Iterable[str], directory: TaskPath, exclude: Iterable[str] = () +) -> list[TaskPath]: + files_per_glob = [ + glob.glob(g, root_dir=directory.path, recursive=True, include_hidden=True) + for g in globs + ] + files = sorted(set(sum(files_per_glob, start=[]))) + task_paths = [TaskPath.from_abspath(directory.path, file) for file in files] + exclude_tp = [TaskPath.from_abspath(directory.path, path) for path in exclude] + return sorted( + tp for tp in task_paths if all(not exc_p.is_prefix(tp) for exc_p in exclude_tp) + )