diff --git a/bin/check_testing_tool.py b/bin/check_testing_tool.py new file mode 100644 index 00000000..e0da3cf3 --- /dev/null +++ b/bin/check_testing_tool.py @@ -0,0 +1,241 @@ +import shutil +import sys +from pathlib import Path +from typing import Optional, Sequence + +import config +import parallel +from program import Program +from run import Submission +from util import * + +if TYPE_CHECKING: # Prevent circular import: https://stackoverflow.com/a/39757388 + from problem import Problem + +"""DISCLAIMER: + + This tool was only made to check testing tools faster. + You should still carefully review the code of the testing tool. + + For this tool to work the following things must hold: + - the testing tool must be found under `attachments/testing_tool.` + - the testing tool must be callable as `{program} -f {in_path} {submission program}` + - the testing tool must accept the downloadable samples as well as those found under + `data/testing_tool_test/` as input files + - the testing tool must exits with a non zero exit code if something goes wrong + - the testing tool must not change the working directory +""" + + +class TestInput: + def __init__(self, problem: "Problem", in_path: Path, short_path: Path): + assert in_path.suffix in [".in", ".download", ".statement"] + self.problem = problem + self.in_path = in_path + self.short_path = short_path + if self.short_path.suffix in [".download", ".statement"]: + ext = self.short_path.suffix + name = self.short_path.with_suffix("") + assert name.suffix in [".in"] + self.name = str(name.with_suffix(ext)) + else: + self.name = str(self.short_path.with_suffix("")) + + +class WrappedSubmission: + def __init__(self, problem: "Problem", submission: Submission): + self.problem = problem + self.submission = submission + self.name = submission.name + self.tmpdir = ( + problem.tmpdir / "testing_tool" / submission.tmpdir.relative_to(problem.tmpdir) + ) + self.tmpdir.mkdir(parents=True, exist_ok=True) + self.run_command: Optional[list[Path | str]] = None + + def supports_memory_limit(self) -> bool: + assert self.run_command is not None + assert self.submission.run_command is not None + return command_supports_memory_limit(self.run_command) and command_supports_memory_limit( + self.submission.run_command + ) + + def _wrapper_script(self) -> str: + assert self.submission.run_command is not None + args = ", ".join(map(repr, self.submission.run_command)) + # script assumes that the working directory is not changed + script = """#!/usr/bin/env python3 +import subprocess +import sys +from pathlib import Path + +result = subprocess.run( + [{args}], + stdout=sys.stdout, + stderr=sys.stderr, + stdin=sys.stdin, +) +returncode_file = Path(".returncode") +# For multipass we store the first non zero return code +write_returncode = True +if returncode_file.is_file(): + raw = returncode_file.read_text() + try: + if int(raw) != 0: + write_returncode = False + except ValueError: + pass +if write_returncode: + returncode_file.write_text(f"{result.returncode}\\n") +sys.exit(result.returncode) +""" + return script.replace("{args}", args) + + def build(self) -> None: + wrapper_file = self.tmpdir / "wrapper.py" + wrapper_file.write_text(self._wrapper_script()) + self.run_command = [sys.executable, wrapper_file] + + def run(self, bar: ProgressBar, testing_tool: "TestingTool", testinput: TestInput) -> bool: + assert self.run_command is not None + rundir = self.tmpdir / testinput.short_path + if rundir.is_file(): + rundir.unlink() + elif rundir.exists(): + shutil.rmtree(rundir) + rundir.mkdir(exist_ok=True, parents=True) + + returncode_file = rundir / ".returncode" + in_path = rundir / "testcase.in" + ensure_symlink(in_path, testinput.in_path) + + localbar = bar.start(testinput) + + result = testing_tool.run(in_path, self) + submission_returncode = None + submission_status = None + if returncode_file.is_file(): + raw = returncode_file.read_text() + try: + submission_returncode = int(raw) + submission_status = default_exec_code_map(submission_returncode) + except ValueError: + pass + ok = bool(result.status) and bool(submission_status) + + message = [] + if result.status == ExecStatus.TIMEOUT: + message.append("TIMEOUT") + elif not result.status: + message.append(f"Testing Tool exit code: {result.returncode}") + if ( + submission_status is not None + and not submission_status + and submission_status != ExecStatus.TIMEOUT + ): + message.append(f"Submission exit code: {submission_returncode}") + if not message: + message.append("OK") + + data = "" + if result.out and result.err: + data = ( + "TESTING TOOL STDERR:" + + localbar._format_data(result.err) + + "\nTESTING TOOL STDOUT:" + + localbar._format_data(result.out) + + "\n" + ) + elif result.err: + data = result.err + elif result.out: + data = result.out + + localbar.done(ok, ", ".join(message), data) + return ok + + +class TestingTool(Program): + def __init__(self, problem: "Problem", path: Path): + super().__init__( + problem, + path, + "testing_tool", + limits={ + "timeout": problem.limits.timeout, + "memory": problem.limits.memory, + }, + ) + + def run(self, in_path: Path, submission: WrappedSubmission) -> ExecResult: + assert self.run_command is not None + assert submission.run_command is not None + exec_res = self._exec_command( + [*self.run_command, "-f", in_path, *submission.run_command], + cwd=in_path.parent, + crop=True, + memory=self.limits["memory"] if submission.supports_memory_limit() else None, + ) + return exec_res + + +def run( + problem: "Problem", testinputs: Sequence[TestInput], submissions: Sequence[Submission] +) -> bool: + wrapped_submissions = [WrappedSubmission(problem, submission) for submission in submissions] + for submission in wrapped_submissions: + submission.build() + + tool_dir = problem.path / "attachments" / "testing_tool" + tool_files = list((problem.path / "attachments").glob("testing_tool.*")) + if (tool_dir.is_dir() and tool_files) or len(tool_files) > 1: + error("Multiple testing tools found!") + return False + elif not tool_dir.is_dir() and not tool_files: + error("No testing tool found!") + return False + + if tool_dir.is_dir(): + testing_tool = TestingTool(problem, tool_dir) + else: + testing_tool = TestingTool(problem, tool_files[0]) + + bar = ProgressBar("Building testing tool", items=[testing_tool]) + localbar = bar.start(testing_tool) + if not testing_tool.build(bar): + localbar.done(False) + return False + localbar.done() + bar.finalize(print_done=False) + + ok = True + + max_submission_len = max([len(x.name) for x in wrapped_submissions]) + max_testinput_len = max(len(x.name) for x in testinputs) + + # When True, the ProgressBar will print a newline before the first error log. + needs_leading_newline = False if config.args.verbose else True + for submission in wrapped_submissions: + bar = ProgressBar( + submission.name, + count=len(testinputs), + max_len=max_testinput_len + max_submission_len - len(submission.name), + needs_leading_newline=needs_leading_newline, + ) + cur_ok = True + + def run_submission(testinput: TestInput) -> None: + nonlocal cur_ok + # skip after first error + if not cur_ok and not config.args.all: + bar.skip() + return + if not submission.run(bar, testing_tool, testinput): + # just writing False is thread safe + cur_ok = False + + parallel.run_tasks(run_submission, testinputs, pin=True) + ok &= cur_ok + needs_leading_newline = bar.finalize() + + return ok diff --git a/bin/constraints.py b/bin/constraints.py index 73145358..40ad81bc 100644 --- a/bin/constraints.py +++ b/bin/constraints.py @@ -1,4 +1,5 @@ import re +import sys from collections import defaultdict from typing import Optional @@ -29,7 +30,7 @@ def check_validators( problem.validate_data(validate.Mode.ANSWER, constraints=ans_constraints) if not problem.settings.ans_is_output and not ans_constraints: log("No constraint validation of answer values found in answer or output validators.") - print() + print(file=sys.stderr) validator_values: set[int | float] = set() validator_defs: list[str | tuple[int | float, str, int | float]] = [] @@ -275,6 +276,7 @@ def check_constraints(problem: Problem) -> bool: print( "{:^{width}}|{:^40}".format("VALIDATORS", "PROBLEM STATEMENT", width=left_width), sep="", + file=sys.stderr, ) while statement_defs or validator_defs: @@ -292,7 +294,7 @@ def check_constraints(problem: Problem) -> bool: if val is not None: validator_defs.remove(val) if isinstance(val, str): - print("{:^{width}}".format(val, width=left_width), sep="", end="") + print("{:^{width}}".format(val, width=left_width), sep="", end="", file=sys.stderr) else: print( "{:>{value_len}_} <= {:^{name_len}} <= {:<{value_len}_}".format( @@ -300,20 +302,21 @@ def check_constraints(problem: Problem) -> bool: ), sep="", end="", + file=sys.stderr, ) else: - print("{:^{width}}".format("", width=left_width), sep="", end="") - print("|", end="") + print("{:^{width}}".format("", width=left_width), sep="", end="", file=sys.stderr) + print("|", end="", file=sys.stderr) if st is not None: languages = ",".join(statement_defs[st]) - print("{:^40} {}".format(st, languages), sep="", end="") + print("{:^40} {}".format(st, languages), sep="", end="", file=sys.stderr) else: - print("{:^40}".format(""), sep="", end="") - print() + print("{:^40}".format(""), sep="", end="", file=sys.stderr) + print(file=sys.stderr) if st is not None: statement_defs.pop(st) - print() + print(file=sys.stderr) warned = False for value in validator_values: @@ -323,7 +326,11 @@ def check_constraints(problem: Problem) -> bool: if not warned: warned = True warn("Values in validators but missing in some statement:") - print(f"{Fore.YELLOW}{value}{Style.RESET_ALL} missing in", ",".join(missing)) + print( + f"{Fore.YELLOW}{value}{Style.RESET_ALL} missing in", + ",".join(missing), + file=sys.stderr, + ) extra_in_statement = set(statement_values.keys()).difference(validator_values) if extra_in_statement: @@ -332,6 +339,7 @@ def check_constraints(problem: Problem) -> bool: print( f"{Fore.YELLOW}{value}{Style.RESET_ALL} in", ",".join(sorted(statement_values[value])), + file=sys.stderr, ) return True diff --git a/bin/generate.py b/bin/generate.py index 7dec303a..8279e09f 100644 --- a/bin/generate.py +++ b/bin/generate.py @@ -661,6 +661,9 @@ def validate_in(t, problem: Problem, testcase: Testcase, meta_yaml: dict, bar: P infile = problem.tmpdir / "data" / t.hash / "testcase.in" assert infile.is_file() + if testcase.root == "testing_tool_test": + return True + input_validator_hashes = testcase.validator_hashes(validate.InputValidator, bar) if all(h in meta_yaml["input_validator_hashes"] for h in input_validator_hashes): return True @@ -705,7 +708,7 @@ def validate_ans_and_out( infile = problem.tmpdir / "data" / t.hash / "testcase.in" assert infile.is_file() - if testcase.root == "invalid_input": + if testcase.root in ["invalid_input", "testing_tool_test"]: return True ansfile = infile.with_suffix(".ans") @@ -939,7 +942,11 @@ def generate_from_rule(): def generate_from_solution(testcase: Testcase, bar: ProgressBar): nonlocal meta_yaml - if testcase.root in [*config.INVALID_CASE_DIRECTORIES, "valid_output"]: + if testcase.root in [ + *config.INVALID_CASE_DIRECTORIES, + "valid_output", + "testing_tool_test", + ]: return True if config.args.no_solution: return True @@ -1021,6 +1028,8 @@ def generate_visualization(testcase: Testcase, bar: ProgressBar): if testcase.root in config.INVALID_CASE_DIRECTORIES: return True + if testcase.root == "testing_tool_test": + return True if config.args.no_visualizer: return True @@ -1182,6 +1191,7 @@ def add_test_case_to_cache(): # consider specific files for the uniqueness of this testcase relevant_files = { + "testing_tool_test": [".in"], "invalid_input": [".in"], "invalid_answer": [".in", ".ans"], "invalid_output": [".in", ".ans", ".out"], @@ -2155,6 +2165,8 @@ def reorder(self): warn(f"{d} is used for invalid test data. Skipping.") elif parts[0] == "valid_output": warn(f"{d} is used for valid test data. Skipping.") + elif parts[0] == "testing_tool_test": + warn(f"{d} is used to test the testing tool. Skipping.") elif path not in self.known_directories: warn(f"{d} is not a generated directory. Skipping.") elif not self.known_directories[path].numbered: diff --git a/bin/problem.py b/bin/problem.py index f0ef73f2..19fe087b 100644 --- a/bin/problem.py +++ b/bin/problem.py @@ -17,6 +17,7 @@ import parallel import run import testcase +import check_testing_tool import validate import validator_tests import verdicts @@ -214,7 +215,7 @@ def __init__( if config.args.timeout: self.validation_time = self.generator_time = self.visualizer_time = config.args.timeout if config.args.memory: - self.memory = self.validation_memory = config.args.memory + self.memory = self.compilation_memory = self.validation_memory = config.args.memory class ProblemSettings: @@ -351,7 +352,7 @@ def __init__(self, path: Path, tmpdir: Path, label: Optional[str] = None): # Some caches. self._testcases = dict[ - tuple[Optional[validate.Mode], bool, bool], list[testcase.Testcase] + tuple[Optional[validate.Mode], bool, bool, bool], list[testcase.Testcase] ]() self._submissions: Optional[list[run.Submission] | Literal[False]] = None self._validators_cache = dict[ # The "bool" is for "check_constraints" @@ -629,17 +630,17 @@ def testcases( mode: Optional[validate.Mode] = None, needans=True, only_samples=False, + testing_tool_test=False, ) -> Sequence[testcase.Testcase]: only_samples = config.args.samples or only_samples - key = (mode, needans, only_samples) + key = (mode, needans, only_samples, testing_tool_test) if key in p._testcases is not None: return p._testcases[key] in_paths = None if config.args.testcases: - if only_samples: - assert False + assert not only_samples # Deduplicate testcases with both .in and .ans. in_paths = [] for t in config.args.testcases: @@ -654,6 +655,8 @@ def testcases( in_paths = list(set(in_paths)) elif mode is not None: + assert not only_samples + assert not testing_tool_test assert needans in_paths = [] for prefix in { @@ -663,6 +666,8 @@ def testcases( validate.Mode.VALID_OUTPUT: ["secret", "sample", "valid_output"], }[mode]: in_paths += glob(p.path, f"data/{prefix}/**/*.in") + elif testing_tool_test: + in_paths = list(glob(p.path, "data/testing_tool_test/**/*.in")) else: in_paths = list(glob(p.path, "data/sample/**/*.in")) if not only_samples: @@ -702,7 +707,7 @@ def testcases( testcases.append(t) testcases.sort(key=lambda t: t.name) - if len(testcases) == 0: + if len(testcases) == 0 and not testing_tool_test: ans = ( " with answer" if needans and mode not in [validate.Mode.INVALID, validate.Mode.VALID_OUTPUT] @@ -1010,7 +1015,7 @@ def _validators( paths = list(glob(problem.path / cls.source_dir, "*")) # TODO: Instead of checking file contents, maybe specify this in generators.yaml? - def has_constraints_checking(f): + def has_constraints_checking(f: Path) -> bool: if not f.is_file(): return False try: @@ -1042,7 +1047,7 @@ def has_constraints_checking(f): ] bar = ProgressBar(f"Building {cls.validator_type} validator", items=validators) - def build_program(p): + def build_program(p: "Program") -> None: localbar = bar.start(p) p.build(localbar) localbar.done() @@ -1054,7 +1059,9 @@ def build_program(p): return validators # get all testcases and submissions and prepare the output validator and visualizer - def prepare_run(problem): + def prepare_run( + problem, + ) -> Literal[False] | tuple[Sequence[testcase.Testcase], Sequence[run.Submission]]: testcases = problem.testcases() if not testcases: return False @@ -1074,7 +1081,9 @@ def prepare_run(problem): return testcases, submissions @staticmethod - def run_some(testcases, submissions): + def run_some( + testcases: Sequence[testcase.Testcase], submissions: Sequence[run.Submission] + ) -> tuple[bool, verdicts.VerdictTable]: max_submission_len = max([len(x.name) for x in submissions]) ok = True @@ -1093,7 +1102,7 @@ def run_some(testcases, submissions): return ok, verdict_table # called by bt run - def run_submissions(problem): + def run_submissions(problem) -> bool: ts_pair = problem.prepare_run() if not ts_pair: return False @@ -1119,7 +1128,7 @@ def run_submissions(problem): # Instead of validating the output, this function just prints all output to the # terminal. # Note: The CLI only accepts one submission. - def test_submissions(problem): + def test_submissions(problem) -> bool: submissions = problem.submissions() if submissions is False: return False @@ -1132,16 +1141,18 @@ def test_submissions(problem): return True @staticmethod - def _print_table(verdict_table, testcases): + def _print_table( + verdict_table: Sequence[verdicts.Verdicts], testcases: Sequence[testcase.Testcase] + ) -> None: # Begin by aggregating bitstrings for all testcases, and find bitstrings occurring often (>=config.TABLE_THRESHOLD). - def single_verdict(row, testcase): + def single_verdict(row: verdicts.Verdicts, testcase: testcase.Testcase) -> str: assert row[testcase.name] is not None if row[testcase.name] is not False: return verdicts.to_char(row[testcase.name]) else: return f"{Style.DIM}-{Style.RESET_ALL}" - def make_verdict(tc): + def make_verdict(tc: testcase.Testcase) -> str: return "".join(map(lambda row: single_verdict(row, tc), verdict_table)) resultant_count, resultant_id = dict[str, int](), dict[str, int]() @@ -1214,11 +1225,36 @@ def make_verdict(tc): print(str.format("(Type {})", resultant_id[resultant]), end="", file=sys.stderr) print(end="\n", file=sys.stderr) - def reset_testcase_hashes(self): - self._testcase_hashes = {} + # called by bt check_testing_tool + def check_testing_tool(problem) -> bool: + testcases = problem.testcases(needans=False, testing_tool_test=True) + testinputs = [ + check_testing_tool.TestInput(problem, t.in_path, t.short_path) for t in testcases + ] + if not config.args.testcases: + sampleinputs = [] + for in_path, _ in problem.download_samples(): + sample = check_testing_tool.TestInput( + problem, in_path, in_path.relative_to(problem.path / "data") + ) + if sample not in testinputs: + sampleinputs.append(sample) + testinputs = sampleinputs + testinputs + if not testinputs: + warn( + f"Didn't find any testcases to run the testing tool in problem {problem.name}. Skipping." + ) + return False + submissions = problem.selected_or_accepted_submissions() + if not submissions: + return False + return check_testing_tool.run(problem, testinputs, submissions) + + def reset_testcase_hashes(self) -> None: + self._testcase_hashes: dict[str, testcase.Testcase] = {} # Returns None for new testcases or the Testcase object it equals. - def matches_existing_testcase(self, t): + def matches_existing_testcase(self, t: testcase.Testcase) -> Optional[testcase.Testcase]: hashes = {} relevant_files = { "invalid_input": ["in"], @@ -1547,7 +1583,7 @@ def get_slowest(result): limits["time_limit"] = problem.limits.time_limit write_yaml(problem_yaml, problem.path / "problem.yaml") - print() + print(file=sys.stderr) message(f"{duration:.3f}s @ {testcase} ({submission})", "slowest AC") message( f"{problem.limits.time_limit}s >= {duration:.3f}s * {problem.limits.ac_to_time_limit}", @@ -1561,13 +1597,13 @@ def get_slowest(result): f"{problem.limits.timeout}s >= {problem.limits.time_limit}s * {problem.limits.time_limit_to_tle}²", "timeout", ) - print() + print(file=sys.stderr) submission, testcase, duration = run_all( lambda vs: vs == [verdicts.Verdict.TIME_LIMIT_EXCEEDED], min ) if submission is not None: - print() + print(file=sys.stderr) message(f"{duration:.3f}s @ {testcase} ({submission})", "fastest TLE") if duration <= problem.limits.time_limit: error("TLE submission runs within time limit") @@ -1575,7 +1611,7 @@ def get_slowest(result): warn("TLE submission runs within safety margin") elif duration >= problem.limits.timeout: log(f"No TLE submission finished within {problem.limits.timeout}s") - print() + print(file=sys.stderr) else: log("No TLE submissions found") diff --git a/bin/program.py b/bin/program.py index cde1633d..7786a3c1 100644 --- a/bin/program.py +++ b/bin/program.py @@ -106,7 +106,7 @@ def sanitizer(): # After build() has been called, the following are available: # - run_command: command to be executed. E.g. ['/path/to/run'] or ['python3', '/path/to/main.py']. `None` if something failed. # -# build() will return the (run_command, message) pair. +# build() will return the true if building was successfull. class Program: input_files: list[Path] # Populated in Program.build @@ -181,18 +181,18 @@ def __init__( # is file at path executable @staticmethod - def _is_executable(path): - return path.is_file() and ( - path.stat().st_mode & (stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH) + def _is_executable(path: Path) -> bool: + return bool( + path.is_file() and (path.stat().st_mode & (stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH)) ) # Returns true when file f matches the given shebang regex. @staticmethod - def _matches_shebang(f, shebang): + def _matches_shebang(f: Path, shebang: Optional[re.Pattern]) -> bool: if shebang is None: return True with f.open() as o: - return shebang.search(o.readline()) + return shebang.search(o.readline()) is not None # Do not warn for the same fallback language multiple times. warn_cache: set[str] = set() @@ -200,7 +200,7 @@ def _matches_shebang(f, shebang): language: Optional[str] # Sets self.language and self.env['mainfile'] - def _get_language(self, bar: ProgressBar): + def _get_language(self, bar: ProgressBar) -> bool: fallback = False candidates = [] for lang in languages(): @@ -300,7 +300,7 @@ def _get_language(self, bar: ProgressBar): bar.error(f"No language detected for {self.path}.") return False - def _checks(self, bar: ProgressBar): + def _checks(self, bar: ProgressBar) -> None: for f in self.source_files: if f.stat().st_size >= config.ICPC_FILE_LIMIT * 1024**2: bar.warn( @@ -367,7 +367,7 @@ def _checks(self, bar: ProgressBar): pass # Return True on success. - def _compile(self, bar: ProgressBar): + def _compile(self, bar: ProgressBar) -> bool: meta_path = self.tmpdir / "meta_.yaml" # Remove all non-source files. @@ -415,7 +415,7 @@ def _compile(self, bar: ProgressBar): return True # Return True on success, False on failure. - def build(self, bar: ProgressBar): + def build(self, bar: ProgressBar) -> bool: assert not self.built self.built = True @@ -527,7 +527,7 @@ def _exec_command(self, *args, **kwargs) -> ExecResult: return exec_command(*args, **kwargs) @staticmethod - def add_callback(problem, path, c): + def add_callback(problem: "Problem", path: Path, c: Callable[["Program"], Any]): if path not in problem._program_callbacks: problem._program_callbacks[path] = [] problem._program_callbacks[path].append(c) @@ -547,7 +547,7 @@ def __init__(self, problem: "Problem", path: Path, **kwargs): # Run the generator in the given working directory. # May write files in |cwd| and stdout is piped to {name}.in if it's not written already. # Returns ExecResult. Success when result.status == ExecStatus.ACCEPTED. - def run(self, bar, cwd, name, args=[]): + def run(self, bar: ProgressBar, cwd: Path, name: str, args: list[str] = []) -> ExecResult: assert self.run_command is not None in_path = cwd / (name + ".in") diff --git a/bin/run.py b/bin/run.py index c8d27039..4707fdd4 100644 --- a/bin/run.py +++ b/bin/run.py @@ -483,10 +483,7 @@ def process_run(run: Run): localbar.item_width = padding_len localbar.done(got_expected, message, data, print_item=False) - p = parallel.new_queue(process_run, pin=True) - for run in runs: - p.put(run) - p.done() + parallel.run_tasks(process_run, runs, pin=True) self.verdict = verdicts["."] assert isinstance(self.verdict, Verdict), "Verdict of root must not be empty" diff --git a/bin/stats.py b/bin/stats.py index 9f0740d7..ba357d12 100644 --- a/bin/stats.py +++ b/bin/stats.py @@ -278,7 +278,7 @@ def loc(file: Path) -> Optional[int]: content = file.read_text() lexer = lexers.guess_lexer_for_filename(file, content) assert isinstance(lexer, pygments.lexer.Lexer) - language = lexer.name.lower() + language = getattr(lexer, "name").lower() tokens = lexer.get_tokens(content) count = 0 diff --git a/bin/tools.py b/bin/tools.py index b1e5d0b9..fbf5dd96 100755 --- a/bin/tools.py +++ b/bin/tools.py @@ -288,7 +288,7 @@ def split_submissions_and_testcases(s: list[Path]) -> tuple[list[Path], list[Pat submissions = [] testcases = [] for p in s: - testcase_dirs = ["data", "sample", "secret", "fuzz"] + testcase_dirs = ["data", "sample", "secret", "fuzz", "testing_tool_cases"] if ( any(part in testcase_dirs for part in p.parts) or p.suffix in config.KNOWN_DATA_EXTENSIONS @@ -817,6 +817,35 @@ def build_parser() -> SuppressingParser: help="Override the default timeout. Default: 1.5 * time_limit + 1.", ) + checktestingtool = subparsers.add_parser( + "check_testing_tool", + parents=[global_parser], + help="Run testing_tool against some or all accepted submissions.", + ) + checktestingtool.add_argument( + "submissions", + nargs="*", + type=Path, + help="optionally supply a list of programs and testcases to run", + ) + checktestingtool.add_argument( + "--no-generate", + "-G", + action="store_true", + help="Do not run `generate` before running submissions.", + ) + checktestingtool.add_argument( + "--timeout", + type=int, + help="Override the default timeout. Default: 1.5 * time_limit + 1.", + ) + checktestingtool.add_argument( + "--all", + "-a", + action="store_true", + help="Run all testcases and don't stop on error.", + ) + # Sort subparsers.add_parser( "sort", parents=[global_parser], help="sort the problems for a contest by name" @@ -1087,7 +1116,7 @@ def run_parsed_arguments(args: argparse.Namespace, personal_config: bool = True) problems, tmpdir = get_problems(problem_dir) # Split submissions and testcases when needed. - if action in ["run", "fuzz", "time_limit"]: + if action in ["run", "fuzz", "time_limit", "check_testing_tool"]: if config.args.submissions: config.args.submissions, config.args.testcases = split_submissions_and_testcases( config.args.submissions @@ -1234,7 +1263,10 @@ def run_parsed_arguments(args: argparse.Namespace, personal_config: bool = True) if action in ["generate"]: success &= generate.generate(problem) - if action in ["all", "constraints", "run", "time_limit"] and not config.args.no_generate: + if ( + action in ["all", "constraints", "run", "time_limit", "check_testing_tool"] + and not config.args.no_generate + ): # Call `generate` with modified arguments. old_args = argparse.Namespace(**vars(config.args)) config.args.jobs = (os.cpu_count() or 1) // 2 @@ -1295,6 +1327,8 @@ def run_parsed_arguments(args: argparse.Namespace, personal_config: bool = True) success &= problem.test_submissions() if action in ["constraints"]: success &= constraints.check_constraints(problem) + if action in ["check_testing_tool"]: + problem.check_testing_tool() if action in ["time_limit"]: success &= problem.determine_time_limit() if action in ["zip"]: diff --git a/bin/util.py b/bin/util.py index eb584f6c..991b8f7c 100644 --- a/bin/util.py +++ b/bin/util.py @@ -1235,6 +1235,11 @@ def __init__( self.pass_id = pass_id +def command_supports_memory_limit(command: Sequence[str | Path]) -> bool: + # https://bugs.openjdk.org/browse/JDK-8071445 + return Path(command[0]).name not in ["java", "javac", "kotlin", "kotlinc", "sbcl"] + + def limit_setter( command: Optional[Sequence[str | Path]], timeout: Optional[int], @@ -1242,35 +1247,45 @@ def limit_setter( group: Optional[int] = None, cores: Literal[False] | list[int] = False, ) -> Callable[[], None]: + # perform all syscalls / things that could fail in the current context, i.e., outside of the preexec_fn + disable_stack_limit = not is_bsd() + + if config.args.memory: + memory_limit = config.args.memory if memory_limit: + memory_limit *= 1024**2 assert command is not None - jvm = Path(command[0]).name in ["java", "javac", "kotlin", "kotlinc"] + if not command_supports_memory_limit(command): + memory_limit = None + if config.args.sanitizer or is_bsd() or is_windows(): + memory_limit = None if group is not None: assert not is_windows() assert not is_mac() + if not is_windows() and not is_bsd(): + cores = False + + # actual preexec_fn called in the context of the new process + # this should only do resource and os calls to stay safe def setlimits() -> None: - if timeout: + if timeout is not None: resource.setrlimit(resource.RLIMIT_CPU, (timeout + 1, timeout + 1)) # Increase the max stack size from default to the max available. - if not is_bsd(): + if disable_stack_limit: resource.setrlimit( resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY) ) - if memory_limit and not jvm and not is_bsd(): - resource.setrlimit( - resource.RLIMIT_AS, - (memory_limit * 1024**2, memory_limit * 1024**2), - ) + if memory_limit is not None: + resource.setrlimit(resource.RLIMIT_AS, (memory_limit, memory_limit)) - # TODO: with python 3.11 it is better to use Popen(process_group=group) if group is not None: os.setpgid(0, group) - if cores is not False and not is_windows() and not is_bsd(): + if cores is not False: os.sched_setaffinity(0, cores) # Disable coredumps. @@ -1344,7 +1359,7 @@ def exec_command( command: Sequence[str | Path], exec_code_map: Callable[[int], ExecStatus] = default_exec_code_map, crop: bool = True, - preexec_fn: bool | Callable[[], None] = True, + preexec_fn: bool = True, **kwargs: Any, ) -> ExecResult: # By default: discard stdout, return stderr @@ -1368,21 +1383,13 @@ def exec_command( timeout: Optional[int] = None if "timeout" in kwargs: - if kwargs["timeout"] is None: - timeout = None - elif kwargs["timeout"]: - timeout = kwargs["timeout"] + timeout = kwargs["timeout"] kwargs.pop("timeout") memory: Optional[int] = None if "memory" in kwargs: - if kwargs["memory"] is not None: - memory = kwargs["memory"] + memory = kwargs["memory"] kwargs.pop("memory") - if config.args.memory: - memory = config.args.memory - if is_windows() or config.args.sanitizer: - memory = None process: Optional[ResourcePopen] = None old_handler = None @@ -1401,7 +1408,7 @@ def interrupt_handler(sig: Any, frame: Any) -> None: tstart = time.monotonic() try: - if not is_windows() and preexec_fn not in [False, None]: + if not is_windows() and preexec_fn is not False: process = ResourcePopen( command, preexec_fn=limit_setter(command, timeout, memory), diff --git a/bin/verdicts.py b/bin/verdicts.py index c4996d2c..6a4a5f92 100644 --- a/bin/verdicts.py +++ b/bin/verdicts.py @@ -4,7 +4,7 @@ import threading from enum import Enum from pathlib import Path -from typing import Literal, TYPE_CHECKING +from typing import Literal, Sequence, TYPE_CHECKING from colorama import Fore, Style @@ -168,7 +168,7 @@ class Verdicts: def __init__( self, - test_cases_list: list[testcase.Testcase], + test_cases_list: Sequence[testcase.Testcase], timeout: int, run_until: RunUntil = RunUntil.FIRST_ERROR, ): @@ -380,7 +380,7 @@ def __iter__(self): def __init__( self, submissions, - test_cases: list[testcase.Testcase], + test_cases: Sequence[testcase.Testcase], width: int = ProgressBar.columns, height: int = shutil.get_terminal_size().lines, max_name_width: int = 50, diff --git a/doc/commands.md b/doc/commands.md index a547fc78..6bff0491 100644 --- a/doc/commands.md +++ b/doc/commands.md @@ -26,6 +26,7 @@ This lists all subcommands and their most important options. - [`bt output [-v] [testcases [testcases ...]]`](#output) - [`bt validate [-v] [--input | --answer | --invalid | --valid-output | --generic [TYPE]] [--remove | --move-to DIR] [testcases [testcases ...]]`](#validate) - [`bt constraints [-v]`](#constraints) + - [`bt check_testing_tool [submissions [submissions ...]] [testcases [testcases ...]`](#check_testing_tool) - Creating new contest/problems - [`bt new_contest [contestname]`](#new_contest) - [`bt new_problem [problemname] [--author AUTHOR] [--type {pass-fail,float,custom,interactive,...}] [--defaults] [--skel SKEL]`](#new_problem) @@ -334,6 +335,24 @@ This output will look like: | a_i 1 ``` +## `check_testing_tool` + +`bt check_testing_tool` tries to run the testing tool with some submissions to ensure that it works properly. +However, this tool has many caveats and should never replace a carefull manual review of the testing tool. + +**Caveats** +- the testing tool must be found under `attachments/testing_tool.` +- the testing tool must be callable as `{program} -f {in_path} {submission program}` +- the testing tool must accept the downloadable samples as well as files matching `data/testing_tool_test/*.in` as input files +- the testing tool must exits with a non zero exit code if something goes wrong +- the testing tool must not change the working directory + +**Flags** + +- `--timeout `: Override the default timeout. +- `--all`/`-a`: run all testcases and don't stop after first error +- `--no-generate`/`-G`: Do not generate testcases before running. This usually won't be needed since checking that generated testcases are up to date is fast. + # Creating a new contest/problem ## `new_contest` diff --git a/test/problems/alternativeencryption/attachments/testing_tool.py b/test/problems/alternativeencryption/attachments/testing_tool.py new file mode 100755 index 00000000..caaf4297 --- /dev/null +++ b/test/problems/alternativeencryption/attachments/testing_tool.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +# +# Testing tool for the Alternative Encryption problem +# +# Usage: +# +# python3 testing_tool.py -f inputfile +# +# +# Use the -f parameter to specify the input file, e.g. 1.in. +# The input file should contain the following: +# - The first line contains "encrypt". +# - The second line contains an integer n, the number of strings. +# - The following n lines each contain one string to encrypt. + +# You can compile and run your solution as follows: + +# C++: +# g++ solution.cpp +# python3 testing_tool.py -f 1.in ./a.out + +# Python: +# python3 testing_tool.py -f 1.in python3 ./solution.py + +# Java: +# javac solution.java +# python3 testing_tool.py -f 1.in java solution + +# Kotlin: +# kotlinc solution.kt +# python3 testing_tool.py -f 1.in kotlin solutionKt + + +# The tool is provided as-is, and you should feel free to make +# whatever alterations or augmentations you like to it. +# +# The tool attempts to detect and report common errors, but it is not an exhaustive test. +# It is not guaranteed that a program that passes this testing tool will be accepted. + + +import argparse +import subprocess +import traceback + +parser = argparse.ArgumentParser(description="Testing tool for problem Alternative Encryption.") +parser.add_argument( + "-f", + dest="inputfile", + metavar="inputfile", + default=None, + type=argparse.FileType("r"), + required=True, + help="The input file to use.", +) +parser.add_argument("program", nargs="+", help="Invocation of your solution") + +args = parser.parse_args() + + +def single_pass(action: str, words: list[str]) -> list[str]: + with ( + subprocess.Popen( + " ".join(args.program), + shell=True, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE, + universal_newlines=True, + ) as p, + ): + assert p.stdin is not None and p.stdout is not None + + raw = "\n".join([action, str(len(words)), *words]) + (stdout, stderr) = p.communicate(input=raw) + output = [line.strip() for line in stdout.strip().split("\n") if line.strip()] + + assert len(output) == len(words), ( + f"Your submission printed {len(output)} words, expected {len(words)} words." + ) + print(f"{action} exit code: {p.returncode}") + print(f"{action} output:") + print() + print(stdout, flush=True) + + for word_a, word_b in zip(words, output): + assert len(word_a) == len(word_b), ( + f"Your submission changed the length of '{word_a}', you printed '{word_b}'" + ) + + for i, (char_a, char_b) in enumerate(zip(word_a, word_b), start=1): + assert char_a != char_b, ( + f"Letter at position {i} ({char_a}) is the same: '{word_a}' => '{word_b}'" + ) + + return output + + +try: + with args.inputfile as f: + # Parse input + lines = [line.strip() for line in f.readlines()] + action = lines[0] + n = int(lines[1]) + words = lines[2:] + + assert action == "encrypt", f"Initial action must be 'encrypt', but got {action}" + + encrypted = single_pass("encrypt", words) + decrypted = single_pass("decrypt", encrypted) + + for expected, got in zip(words, decrypted): + assert expected == got, f"Got decrypted word '{got}', expected '{expected}'" + + print("Success.") + +except AssertionError as e: + print() + print(f"Error: {e}") + print() + exit(1) + +except Exception: + print() + print("Unexpected error:") + traceback.print_exc() + print() + exit(1) diff --git a/test/problems/alternativeencryption/data/sample/001.ans b/test/problems/alternativeencryption/data/sample/01.ans similarity index 100% rename from test/problems/alternativeencryption/data/sample/001.ans rename to test/problems/alternativeencryption/data/sample/01.ans diff --git a/test/problems/alternativeencryption/data/sample/001.in b/test/problems/alternativeencryption/data/sample/01.in similarity index 100% rename from test/problems/alternativeencryption/data/sample/001.in rename to test/problems/alternativeencryption/data/sample/01.in diff --git a/test/problems/alternativeencryption/data/sample/001.interaction b/test/problems/alternativeencryption/data/sample/01.interaction similarity index 100% rename from test/problems/alternativeencryption/data/sample/001.interaction rename to test/problems/alternativeencryption/data/sample/01.interaction diff --git a/test/problems/alternativeencryption/generators/generators.yaml b/test/problems/alternativeencryption/generators/generators.yaml index 2d4f34a5..c63dc032 100644 --- a/test/problems/alternativeencryption/generators/generators.yaml +++ b/test/problems/alternativeencryption/generators/generators.yaml @@ -31,4 +31,4 @@ data: - random: generate: eval.py {seed} 1000 randstr(randrange(1, 101)) - count: 100 + count: 10 diff --git a/test/problems/alternativeencryption/submissions/accepted/Paul.kt b/test/problems/alternativeencryption/submissions/accepted/Paul.kt deleted file mode 100644 index bd8e05c3..00000000 --- a/test/problems/alternativeencryption/submissions/accepted/Paul.kt +++ /dev/null @@ -1,8 +0,0 @@ -import java.util.* - -fun main() { - readln() - for (i in 1..readln().toInt()) { - println(readln().trim().map { (((it.code - 1) xor 1) + 1).toChar() }.joinToString("")) - } -} diff --git a/test/test_problems.py b/test/test_problems.py index 4f0b7eee..1a372384 100644 --- a/test/test_problems.py +++ b/test/test_problems.py @@ -21,6 +21,7 @@ "interactivemultipass", "multipass", "constants", + "alternativeencryption", ] + ["hellounix" if not util.is_mac() and not util.is_windows() else []] RUN_DIR = Path.cwd().absolute() @@ -42,6 +43,28 @@ def test_problem(self): tools.test(["run"]) +@pytest.fixture(scope="class") +def setup_alternativeencryption_problem(request): + problem_dir = RUN_DIR / "test/problems/alternativeencryption" + os.chdir(problem_dir) + try: + tools.test(["tmp", "--clean"]) + yield + finally: + tools.test(["tmp", "--clean"]) + os.chdir(RUN_DIR) + + +@pytest.mark.usefixtures("setup_alternativeencryption_problem") +class TestAlternativeencryptionProblem: + def test_check_testing_tool(self): + tools.test(["check_testing_tool"]) + + def test_bad_check_testing_tool(self): + with pytest.raises(SystemExit): + tools.test(["check_testing_tool", "submissions/wrong_answer/no-change.py"]) + + @pytest.fixture(scope="class") def setup_constants_problem(request): problem_dir = RUN_DIR / "test/problems/constants"