diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index b734a1668..d1672aaad 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -16,6 +16,7 @@ jobs:
     container: ragnargrootkoerkamp/bapctools
     steps:
       - uses: actions/checkout@v4
+      - run: git config --global --add safe.directory "$(pwd)"
       - run: bash test/yaml/generators/test_schemata.sh
       - run: pytest
 
@@ -50,5 +51,7 @@ jobs:
                 lmodern
                 texlive-science
                 latexmk
+                texlive-lang-german
+                asymptote
       - shell: wsl-bash {0}
         run: pytest
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cfcb3694c..86847e8ba 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,9 +7,9 @@ repos:
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.9.9
     hooks:
-    -   id: ruff-format
     -   id: ruff
         args: [ --fix ]
+    -   id: ruff-format
 -   repo: https://github.com/pre-commit/mirrors-mypy
     rev: v1.15.0
     hooks:
diff --git a/bin/config.py b/bin/config.py
index ab1ce2347..b10b0dfef 100644
--- a/bin/config.py
+++ b/bin/config.py
@@ -5,7 +5,9 @@
 import re
 from pathlib import Path
 from collections.abc import Mapping, Sequence
-from typing import Final, Literal, Optional
+from typing import Any, Final, Literal, Optional
+
+SPEC_VERSION: Final[str] = "2023-07-draft"
 
 # return values
 RTV_AC: Final[int] = 42
@@ -32,9 +34,20 @@
 # When --table is set, this threshold determines the number of identical profiles needed to get flagged.
 TABLE_THRESHOLD: Final[int] = 4
 
-FILE_NAME_REGEX: Final[str] = "[a-zA-Z0-9][a-zA-Z0-9_.-]*[a-zA-Z0-9]"
+FILE_NAME_REGEX: Final[str] = "[a-zA-Z0-9][a-zA-Z0-9_.-]{0,253}[a-zA-Z0-9]"
 COMPILED_FILE_NAME_REGEX: Final[re.Pattern[str]] = re.compile(FILE_NAME_REGEX)
 
+CONSTANT_NAME_REGEX = "[a-zA-Z_][a-zA-Z0-9_]*"
+COMPILED_CONSTANT_NAME_REGEX: Final[re.Pattern[str]] = re.compile(CONSTANT_NAME_REGEX)
+CONSTANT_SUBSTITUTE_REGEX: Final[re.Pattern[str]] = re.compile(
+    f"\\{{\\{{({CONSTANT_NAME_REGEX})\\}}\\}}"
+)
+
+BAPCTOOLS_SUBSTITUTE_REGEX: Final[re.Pattern[str]] = re.compile(
+    f"\\{{%({CONSTANT_NAME_REGEX})%\\}}"
+)
+
+
 KNOWN_TESTCASE_EXTENSIONS: Final[Sequence[str]] = [
     ".in",
     ".ans",
@@ -48,14 +61,18 @@
     ".pdf",
 ]
 
+KNOWN_SAMPLE_TESTCASE_EXTENSIONS: Final[Sequence[str]] = [
+    ".in.statement",
+    ".ans.statement",
+    ".in.download",
+    ".ans.download",
+]
+
 KNOWN_TEXT_DATA_EXTENSIONS: Final[Sequence[str]] = [
     *KNOWN_TESTCASE_EXTENSIONS,
+    *KNOWN_SAMPLE_TESTCASE_EXTENSIONS,
     ".interaction",
-    ".hint",
-    ".desc",
-    ".in.statement",
-    ".ans.statement",
-    #'.args',
+    ".yaml",
 ]
 
 KNOWN_DATA_EXTENSIONS: Final[Sequence[str]] = [
@@ -67,7 +84,6 @@
     "invalid_input",
     "invalid_answer",
     "invalid_output",
-    "bad",
 ]
 
 
@@ -86,11 +102,12 @@
 
 args = argparse.Namespace()
 
-DEFAULT_ARGS: Final[Mapping] = {
+DEFAULT_ARGS: Final[Mapping[str, Any]] = {
     "jobs": (os.cpu_count() or 1) // 2,
     "time": 600,  # Used for `bt fuzz`
     "verbose": 0,
-    "languages": None,
+    "action": None,
+    "no_visualizer": True,
 }
 
 
@@ -101,7 +118,7 @@
 grep -Ev '^(h|jobs|time|verbose)$' | sed 's/^/"/;s/$/",/' | tr '\n' ' ' | sed 's/^/ARGS_LIST: Final[Sequence[str]] = [/;s/, $/]\n/'
 """
 # fmt: off
-ARGS_LIST: Final[Sequence[str]] = ["1", "add", "all", "answer", "api", "author", "check_deterministic", "clean", "colors", "contest", "contest_id", "contestname", "cp", "defaults", "default_solution", "depth", "directory", "error", "force", "force_build", "generic", "input", "interaction", "interactive", "invalid", "kattis", "language", "latest_bt", "memory", "more", "move_to", "no_bar", "no_generate", "no_solution", "no_solutions", "no_testcase_sanity_checks", "no_time_limit", "no_validators", "no_visualizer", "open", "order", "order_from_ccs", "overview", "password", "post_freeze", "problem", "problemname", "remove", "reorder", "samples", "sanitizer", "skel", "skip", "sort", "submissions", "table", "testcases", "time_limit", "timeout", "token", "tree", "type", "username", "valid_output", "watch", "web", "write"]
+ARGS_LIST: Final[Sequence[str]] = ["1", "add", "all", "answer", "api", "author", "check_deterministic", "clean", "colors", "contest", "contest_id", "contestname", "cp", "defaults", "default_solution", "depth", "directory", "error", "force", "force_build", "generic", "input", "interaction", "interactive", "invalid", "kattis", "lang", "latest_bt", "legacy", "memory", "more", "move_to", "no_bar", "no_generate", "no_solution", "no_solutions", "no_testcase_sanity_checks", "no_time_limit", "no_validators", "no_visualizer", "number", "open", "order", "order_from_ccs", "overview", "password", "post_freeze", "problem", "problemname", "remove", "reorder", "samples", "sanitizer", "skel", "skip", "sort", "submissions", "table", "testcases", "time_limit", "timeout", "token", "tree", "type", "username", "valid_output", "watch", "web", "write"]
 # fmt: on
 
 
diff --git a/bin/constraints.py b/bin/constraints.py
index a8bdfa751..731453584 100644
--- a/bin/constraints.py
+++ b/bin/constraints.py
@@ -1,8 +1,11 @@
 import re
 from collections import defaultdict
+from typing import Optional
 
+import latex
 import validate
 from colorama import Fore, Style
+from problem import Problem
 
 # Local imports
 from util import *
@@ -15,21 +18,23 @@
 """
 
 
-def check_validators(problem):
+def check_validators(
+    problem: Problem,
+) -> tuple[set[int | float], list[str | tuple[int | float, str, int | float]]]:
     in_constraints: validate.ConstraintsDict = {}
     ans_constraints: validate.ConstraintsDict = {}
     problem.validate_data(validate.Mode.INPUT, constraints=in_constraints)
     if not in_constraints:
         warn("No constraint validation of input values found in input validators.")
     problem.validate_data(validate.Mode.ANSWER, constraints=ans_constraints)
-    if not problem.interactive and not problem.multi_pass and not ans_constraints:
+    if not problem.settings.ans_is_output and not ans_constraints:
         log("No constraint validation of answer values found in answer or output validators.")
     print()
 
-    validator_values = set()
+    validator_values: set[int | float] = set()
     validator_defs: list[str | tuple[int | float, str, int | float]] = []
 
-    def f(cs):
+    def f(cs: validate.ConstraintsDict) -> None:
         for loc, value in sorted(cs.items()):
             name, has_low, has_high, vmin, vmax, low, high = value
             validator_defs.append((low, name, high))
@@ -44,12 +49,12 @@ def f(cs):
     return validator_values, validator_defs
 
 
-def check_statement(problem, language):
-    statement_file = problem.path / f"problem_statement/problem.{language}.tex"
+def check_statement(problem: Problem, language: str) -> tuple[set[int | float], list[str]]:
+    statement_file = problem.path / latex.PdfType.PROBLEM.path(language)
     statement = statement_file.read_text()
 
-    statement_values = set()
-    statement_defs = []
+    statement_values: set[int | float] = set()
+    statement_defs: list[str] = []
 
     defines = ["\\def", "\\newcommand"]
     sections = ["Input", "Output", "Interaction"]
@@ -66,15 +71,16 @@ def check_statement(problem, language):
     }
     relations = re.compile(r"(<=|!=|>=|<|=|>)")
 
-    def math_eval(text):
+    def math_eval(text: str) -> Optional[int | float]:
         try:
             # eval is dangerous, but on the other hand we run submission code so this is fine
             text = text.replace("^", "**")
-            return eval(text, {"__builtin__": None})
+            value = eval(text, {"__builtin__": None})
+            return value if isinstance(value, (int, float)) else None
         except (SyntaxError, NameError, TypeError, ZeroDivisionError):
             return None
 
-    def constraint(text):
+    def constraint(text: str) -> None:
         # handles $$math$$
         if len(text) == 0:
             return
@@ -131,13 +137,13 @@ def constraint(text):
     in_io = False
     end = None
 
-    def matches(text):
+    def matches(text: str) -> bool:
         nonlocal pos
         if pos + len(text) > len(statement):
             return False
         return statement[pos : pos + len(text)] == text
 
-    def parse_group():
+    def parse_group() -> str:
         nonlocal pos
         assert statement[pos] == "{"
         next = pos + 1
@@ -154,7 +160,7 @@ def parse_group():
         pos = next
         return name
 
-    def parse_command():
+    def parse_command() -> str:
         nonlocal pos
         assert statement[pos] == "\\"
         next = pos + 1
@@ -170,7 +176,7 @@ def parse_command():
     # 3) if a section starts parse that (and ensure that no environment is active)
     # 4) if an environment begins parse that (and ensure that no other environment is active)
     # 5) if a new define starts parse that
-    # 6) if inline math starts in an input/ouput part parse it as constraint
+    # 6) if inline math starts in an input/output part parse it as constraint
     while pos < len(statement):
         if statement[pos] == "%":
             next = statement.find("\n", pos)
@@ -250,16 +256,16 @@ def parse_command():
     return statement_values, statement_defs
 
 
-def check_constraints(problem):
+def check_constraints(problem: Problem) -> bool:
     validator_values, validator_defs = check_validators(problem)
-    statement_values = defaultdict(set)
-    statement_defs = defaultdict(set)
+    statement_values: dict[int | float, set[str]] = defaultdict(set)
+    statement_defs: dict[str, set[str]] = defaultdict(set)
     for lang in problem.statement_languages:
         values, defs = check_statement(problem, lang)
-        for entry in values:
-            statement_values[entry].add(lang)
-        for entry in defs:
-            statement_defs[entry].add(lang)
+        for value_entry in values:
+            statement_values[value_entry].add(lang)
+        for def_entry in defs:
+            statement_defs[def_entry].add(lang)
 
     # print all the definitions.
     value_len = 12
diff --git a/bin/contest.py b/bin/contest.py
index 0e07024f7..241301dd8 100644
--- a/bin/contest.py
+++ b/bin/contest.py
@@ -1,23 +1,23 @@
 import config
 
 from pathlib import Path
+from typing import cast, Any, Optional
 
 from util import *
 
 # Read the contest.yaml, if available
-_contest_yaml = None
+_contest_yaml: Optional[dict[str, Any]] = None
 
 
-def contest_yaml():
+def contest_yaml() -> dict[str, Any]:
     global _contest_yaml
     if _contest_yaml is not None:
         return _contest_yaml
 
-    # TODO: Do we need both here?
-    for p in [Path("contest.yaml"), Path("../contest.yaml")]:
-        if p.is_file():
-            _contest_yaml = read_yaml_settings(p)
-            return _contest_yaml
+    contest_yaml_path = Path("contest.yaml")
+    if contest_yaml_path.is_file():
+        _contest_yaml = read_yaml_settings(contest_yaml_path)
+        return _contest_yaml
     _contest_yaml = {}
     return _contest_yaml
 
@@ -25,23 +25,23 @@ def contest_yaml():
 _problems_yaml = None
 
 
-def problems_yaml():
+def problems_yaml() -> Optional[list[dict[str, Any]]]:
     global _problems_yaml
-    if _problems_yaml:
-        return _problems_yaml
     if _problems_yaml is False:
         return None
+    if _problems_yaml:
+        return _problems_yaml
 
     problemsyaml_path = Path("problems.yaml")
     if not problemsyaml_path.is_file():
         _problems_yaml = False
         return None
     _problems_yaml = read_yaml(problemsyaml_path)
-    return _problems_yaml
+    return cast(list[dict[str, Any]], _problems_yaml)
 
 
-def get_api():
-    api = config.args.api or contest_yaml().get("api")
+def get_api() -> str:
+    api = config.args.api or cast(str, contest_yaml().get("api"))
     if not api:
         fatal(
             "Could not find key `api` in contest.yaml and it was not specified on the command line."
@@ -105,7 +105,7 @@ def call_api(method, endpoint, **kwargs):
     return r
 
 
-def call_api_get_json(url):
+def call_api_get_json(url: str):
     r = call_api("GET", url)
     r.raise_for_status()
     try:
diff --git a/bin/export.py b/bin/export.py
index 433acaf7a..fc087cfff 100644
--- a/bin/export.py
+++ b/bin/export.py
@@ -1,89 +1,81 @@
+import config
 import datetime
+import re
+import shutil
 import sys
+import util
 import yaml
-import re
 import zipfile
-import config
-import util
 from pathlib import Path
-from typing import Optional
+from typing import Any, Optional
 
 from contest import *
+from latex import PdfType
 from problem import Problem
+from validate import InputValidator, AnswerValidator, OutputValidator
+from visualize import InputVisualizer, OutputVisualizer
 
 
-# Replace \problemname{...} by the value of `name:` in problems.yaml in all .tex files.
-# This is needed because Kattis is currently still running the legacy version of the problem spec,
-# rather than 2023-07-draft.
-def fix_problem_name_cmd(problem):
-    reverts = []
-    for f in (problem.path / "problem_statement").iterdir():
-        if f.is_file() and f.suffix == ".tex" and len(f.suffixes) >= 2:
-            lang = f.suffixes[-2][1:]
-            t = f.read_text()
-            match = re.search(r"\\problemname\{\s*(\\problemyamlname)?\s*\}", t)
-            if match:
-                if lang in problem.settings.name:
-                    reverts.append((f, t))
-                    t = t.replace(match[0], r"\problemname{" + problem.settings.name[lang] + "}")
-                    f.write_text(t)
-                else:
-                    util.error(f"{f}: no name set for language {lang}.")
-
-    def revert():
-        for f, t in reverts:
-            f.write_text(t)
-
-    return revert
-
-
-def force_single_language(problems):
-    if config.args.languages and len(config.args.languages) == 1:
-        statement_language = config.args.languages[0]
+def select_languages(problems: list[Problem]) -> list[str]:
+    if config.args.lang:
+        languages = config.args.lang
     else:
-        all_languages = set.union(*(set(p.statement_languages) for p in problems))
-        if len(all_languages) > 1:
-            fatal("Multiple languages found, please specify one with --language")
-        statement_language = all_languages.pop()
-    return statement_language
+        languages = list(set(sum((p.statement_languages for p in problems), [])))
+    languages.sort()
+    if config.args.legacy:
+        if len(languages) > 1:
+            # legacy can handle at most one language
+            fatal("Multiple languages found, please specify one with --lang")
+    if not languages:
+        fatal("No language found")
+    return languages
 
 
 # Write any .lang.pdf files to .pdf.
-def remove_language_suffix(fname, statement_language):
-    if not statement_language:
-        return fname
-    out = Path(fname)
-    if out.suffixes == ["." + statement_language, ".pdf"]:
-        out = out.with_suffix("").with_suffix(".pdf")
-    return out
+def remove_language_pdf_suffix(file: Path, lang: Optional[str]) -> Path:
+    if lang and file.name.endswith(f".{lang}.pdf"):
+        return file.with_name(file.name.removesuffix(f".{lang}.pdf") + ".pdf")
+    else:
+        return file
 
 
-def build_samples_zip(problems, output, statement_language):
+def build_samples_zip(problems: list[Problem], output: Path, languages: list[str]) -> None:
     zf = zipfile.ZipFile(output, mode="w", compression=zipfile.ZIP_DEFLATED, allowZip64=False)
 
     # Do not include contest PDF for kattis.
     if not config.args.kattis:
-        for fname in glob(Path("."), f"contest*.{statement_language}.pdf"):
-            if Path(fname).is_file():
-                zf.write(
-                    fname,
-                    remove_language_suffix(fname, statement_language),
-                    compress_type=zipfile.ZIP_DEFLATED,
-                )
+        for language in languages:
+            for file in glob(Path("."), f"contest*.{language}.pdf"):
+                out = remove_language_pdf_suffix(file, language) if config.args.legacy else file
+                if Path(file).is_file():
+                    zf.write(
+                        file,
+                        out,
+                        compress_type=zipfile.ZIP_DEFLATED,
+                    )
 
     for problem in problems:
+        if not problem.label:
+            fatal(f"Cannot create samples zip: Problem {problem.name} does not have a label!")
+
         outputdir = Path(problem.label)
 
         attachments_dir = problem.path / "attachments"
         if (problem.interactive or problem.multi_pass) and not attachments_dir.is_dir():
-            interactive = "interactive " if problem.interactive else ""
-            multi_pass = "multi-pass " if problem.multi_pass else ""
             util.error(
-                f"{interactive}{multi_pass}problem {problem.name} does not have an attachments/ directory."
+                f"{problem.settings.type_name()} problem {problem.name} does not have an attachments/ directory."
             )
             continue
 
-        empty = True
+        contents: dict[Path, Path] = {}  # Maps desination to source, to allow checking duplicates.
+
+        # Add samples.
+        samples = problem.download_samples()
+        for i, (in_file, ans_file) in enumerate(samples):
+            base_name = outputdir / str(i + 1)
+            contents[base_name.with_suffix(".in")] = in_file
+            if ans_file.stat().st_size > 0:
+                contents[base_name.with_suffix(".ans")] = ans_file
 
         # Add attachments if they exist.
         if attachments_dir.is_dir():
@@ -91,121 +83,80 @@ def build_samples_zip(problems, output, statement_language):
                 if f.is_dir():
                     util.error(f"{f} directory attachments are not yet supported.")
                 elif f.is_file() and f.exists():
-                    zf.write(f, outputdir / f.name)
-                    empty = False
+                    destination = outputdir / f.name
+                    if destination in contents:
+                        util.error(
+                            f"Cannot overwrite {destination} from attachments/"
+                            + f" (sourced from {contents[destination]})."
+                            + "\n\tDo not include samples in attachments/,"
+                            + " use .{in,ans}.statement or .{in,ans}.download instead."
+                        )
+                    else:
+                        contents[destination] = f
                 else:
                     util.error(f"Cannot include broken file {f}.")
 
-        # Add samples for non-interactive and non-multi-pass problems.
-        if not problem.interactive and not problem.multi_pass:
-            samples = problem.testcases(only_samples=True)
-            if samples:
-                for i in range(0, len(samples)):
-                    sample = samples[i]
-                    basename = outputdir / str(i + 1)
-                    zf.write(sample.in_path, basename.with_suffix(".in"))
-                    zf.write(sample.ans_path, basename.with_suffix(".ans"))
-                    empty = False
-
-        if empty:
+        if contents:
+            for destination, source in contents.items():
+                zf.write(source, destination)
+        else:
             util.error(f"No attachments or samples found for problem {problem.name}.")
 
     zf.close()
     print("Wrote zip to samples.zip", file=sys.stderr)
 
 
-def build_problem_zip(problem: Problem, output: Path):
-    """Make DOMjudge ZIP file for specified problem."""
+def build_problem_zip(problem: Problem, output: Path) -> bool:
+    """Make DOMjudge/Kattis ZIP file for specified problem."""
 
-    # Add problem PDF for only one language to the zip file (note that Kattis export does not include PDF)
-    statement_language = None if config.args.kattis else force_single_language([problem])
+    if not has_ryaml:
+        error("zip needs the ruamel.yaml python3 library. Install python[3]-ruamel.yaml.")
+        return False
 
-    deprecated = [  # may be removed at some point.
-        "domjudge-problem.ini",
-    ]
+    from ruamel.yaml.comments import CommentedMap
 
-    write_file_strs: list[tuple[str, str]] = []
+    languages = select_languages([problem])
 
     files = [
-        ("problem_statement/*", True),
+        ("problem.yaml", True),
+        ("statement/*", True),
+        ("solution/*", False),
+        ("problem_slide/*", False),
+        ("generators/*", False),
+        (f"{InputValidator.source_dir}/**/*", True),
+        (f"{AnswerValidator.source_dir}/**/*", False),  # TODO required when not interactive?
         ("submissions/accepted/**/*", True),
         ("submissions/*/**/*", False),
         ("attachments/**/*", problem.interactive or problem.multi_pass),
+        (f"{InputVisualizer.source_dir}/**/*", False),
+        (f"{OutputVisualizer.source_dir}/**/*", False),
     ]
 
-    testcases = [
-        ("data/secret/**/*.in", True),
-        ("data/sample/**/*.in", not problem.interactive and not problem.multi_pass),
-    ]
-
-    if problem.interactive or problem.multi_pass:
-        # .interaction files don't need a corresponding .in
-        # therefore we can handle them like all other files
-        files += [("data/sample/**/*.interaction", False)]
-
+    # Do not include PDFs for kattis.
     if not config.args.kattis:
-        files += [
-            (f"problem.{statement_language}.pdf", True),
-            ("data/sample/**/*.in.statement", False),
-            ("data/sample/**/*.ans.statement", False),
-        ]
+        for language in languages:
+            files.append((PdfType.PROBLEM.path(language, ".pdf").name, True))
+            files.append((PdfType.PROBLEM_SLIDE.path(language, ".pdf").name, False))
+            files.append((PdfType.SOLUTION.path(language, ".pdf").name, False))
 
     if problem.custom_output:
-        files.append(("output_validators/**/*", True))
-
-    if config.args.kattis:
-        files.append(("input_validators/**/*", True))
+        files.append((f"{OutputValidator.source_dir}/**/*", True))
 
-    print("Preparing to make ZIP file for problem dir %s" % problem.path, file=sys.stderr)
+    message("preparing zip file content", "Zip", problem.path, color_type=MessageType.LOG)
 
-    # DOMjudge does not support 'type' in problem.yaml nor 'output_validator_args' in testdata.yaml yet.
-    # TODO: Remove this once it does.
-    problem_yaml_str = (problem.path / "problem.yaml").read_text()
-    if not config.args.kattis and not problem.settings.is_legacy():
-        validator_flags = " ".join(
-            problem.get_testdata_yaml(
-                problem.path / "data",
-                "output_validator_args",
-                PrintBar("Getting validator_flags for legacy DOMjudge export"),
-            )
-        )
-        if validator_flags:
-            validator_flags = "validator_flags: " + validator_flags + "\n"
-        write_file_strs.append(
-            (
-                "problem.yaml",
-                f"""{problem_yaml_str}\nvalidation: {
-                    "custom interactive"
-                    if problem.interactive
-                    else "custom multi-pass"
-                    if problem.multi_pass
-                    else "custom"
-                    if problem.custom_output
-                    else "default"
-                }\n{validator_flags}""",
-            )
-        )
-    else:
-        write_file_strs.append(("problem.yaml", problem_yaml_str))
-
-    # DOMjudge does not support 'limits.time_limit' in problem.yaml yet.
-    # TODO: Remove this once it does.
-    if not config.args.kattis:
-        write_file_strs.append((".timelimit", str(problem.limits.time_limit)))
-
-    # Warn for all deprecated files but still add them to the files list
-    for pattern in deprecated:
-        files.append((pattern, False))
-        # Only include hidden files if the pattern starts with a '.'.
-        paths = list(util.glob(problem.path, pattern, include_hidden=pattern[0] == "."))
-        if len(paths) > 0:
-            addition = ""
-            if len(paths) > 1:
-                addition = f" and {len(paths) - 1} more"
-            util.warn(f'Found deprecated file "{paths[0]}"{addition}.')
+    # prepare files inside dir
+    export_dir = problem.tmpdir / "export"
+    if export_dir.exists():
+        shutil.rmtree(export_dir)
+    # For Kattis / draft spec, prepend the problem shortname to all files.
+    if config.args.kattis or not config.args.legacy:
+        export_dir /= problem.name
+    export_dir.mkdir(parents=True, exist_ok=True)
 
-    # Build list of files to store in ZIP file.
-    copyfiles = set()
+    def add_file(path: Path, source: Path) -> None:
+        path = export_dir / path
+        path.parent.mkdir(parents=True, exist_ok=True)
+        ensure_symlink(path, source)
 
     # Include all files beside testcases
     for pattern, required in files:
@@ -214,55 +165,220 @@ def build_problem_zip(problem: Problem, output: Path):
         if required and len(paths) == 0:
             util.error(f"No matches for required path {pattern}.")
         for f in paths:
-            # NOTE: Directories are skipped because ZIP only supports files.
             if f.is_file():
-                out = f.relative_to(problem.path)
-                out = remove_language_suffix(out, statement_language)
-                # For Kattis, prepend the problem shortname to all files.
-                if config.args.kattis:
-                    out = problem.name / out
-                copyfiles.add((f, out))
-
-    # Include all testcases (specified by a .in file) and copy all related files
-    for pattern, required in testcases:
-        paths = list(util.glob(problem.path, pattern))
-        if required and len(paths) == 0:
-            util.error(f"No matches for required path {pattern}.")
-        for f in paths:
-            # NOTE: Directories are skipped because ZIP only supports files.
+                add_file(f.relative_to(problem.path), f)
+
+    def add_testcase(in_file: Path) -> None:
+        base_name = util.drop_suffix(in_file, [".in", ".in.statement", ".in.download"])
+        for ext in config.KNOWN_DATA_EXTENSIONS:
+            f = base_name.with_suffix(ext)
             if f.is_file():
-                if not f.with_suffix(".ans").is_file():
-                    util.warn(f"No answer file found for {f}, skipping.")
-                else:
-                    for ext in config.KNOWN_DATA_EXTENSIONS:
-                        f2 = f.with_suffix(ext)
-                        if f2.is_file():
-                            out = f2.relative_to(problem.path)
-                            # For Kattis, prepend the problem shortname to all files.
-                            if config.args.kattis:
-                                out = problem.name / out
-                            copyfiles.add((f2, out))
+                add_file(f.relative_to(problem.path), f)
+
+    # Include all sample test cases and copy all related files.
+    samples = problem.download_samples()
+    if len(samples) == 0:
+        util.error("No samples found.")
+    for in_file, _ in samples:
+        add_testcase(in_file)
+
+    # Include all secret test cases and copy all related files.
+    pattern = "data/secret/**/*.in"
+    paths = util.glob(problem.path, pattern)
+    if len(paths) == 0:
+        util.error(f"No secret test cases found in {pattern}.")
+    for f in paths:
+        if f.is_file():
+            if f.with_suffix(".ans").is_file():
+                add_testcase(f)
+            else:
+                util.warn(f"No answer file found for {f}, skipping.")
+
+    # handle languages (files and yaml have to be in sync)
+    yaml_path = export_dir / "problem.yaml"
+    yaml_data = read_yaml(yaml_path)
+    yaml_data["name"] = CommentedMap(
+        {language: problem.settings.name[language] for language in languages}
+    )
+    for type in PdfType:
+        for file in export_dir.glob(str(type.path("*"))):
+            if file.suffixes[-2][1:] not in languages:
+                file.unlink()
 
-    # Build .ZIP file.
-    print("writing ZIP file:", output, file=sys.stderr)
+    # drop explicit timelimit for kattis
+    if config.args.kattis:
+        if "limits" in yaml_data and "time_limit" in yaml_data["limits"]:
+            ryaml_filter(yaml_data["limits"], "time_limit")
+
+    # substitute constants.
+    if problem.settings.constants:
+        constants_supported = [
+            "data/**/test_group.yaml",
+            f"{InputValidator.source_dir}/**/*",
+            f"{AnswerValidator.source_dir}/**/*",
+            f"{OutputValidator.source_dir}/**/*",
+            # "statement/*", "solution/*", "problem_slide/*", use \constant{} commands
+            # "submissions/*/**/*", removed support?
+            f"{InputVisualizer.source_dir}/**/*",
+            f"{OutputVisualizer.source_dir}/**/*",
+        ]
+        for pattern in constants_supported:
+            for f in export_dir.glob(pattern):
+                if f.is_file() and util.has_substitute(f, config.CONSTANT_SUBSTITUTE_REGEX):
+                    text = f.read_text()
+                    text = util.substitute(
+                        text,
+                        problem.settings.constants,
+                        pattern=config.CONSTANT_SUBSTITUTE_REGEX,
+                        bar=util.PrintBar("Zip"),
+                    )
+                    f.unlink()
+                    f.write_text(text)
+
+    # move pdfs
+    if config.args.legacy and languages:
+        for type in PdfType:
+            file = export_dir / type.path(languages[0], ".pdf").name
+            if file.exists():
+                file.rename(remove_language_pdf_suffix(file, languages[0]))
+    else:
+        for language in languages:
+            for type in PdfType:
+                path = type.path(language, ".pdf")
+                file = export_dir / path.name
+                out = export_dir / path
+                if not file.exists():
+                    continue
+                if out.exists():
+                    util.warn(f"can't add {path} (already exists).")
+                    file.unlink()
+                    continue
+                out.parent.mkdir(parents=True, exist_ok=True)
+                file.rename(out)
+
+    # downgrade some parts of the problem to be more legacy like
+    if config.args.legacy:
+        # drop format version -> legacy
+        if "problem_format_version" in yaml_data:
+            ryaml_filter(yaml_data, "problem_format_version")
+        # type -> validation
+        if "type" in yaml_data:
+            ryaml_filter(yaml_data, "type")
+        validation = []
+        if problem.custom_output:
+            validation.append("custom")
+            if problem.interactive:
+                validation.append("interactive")
+            if problem.multi_pass:
+                validation.append("multi-pass")
+        else:
+            validation.append("default")
+        yaml_data["validation"] = " ".join(validation)
+        # credits -> author
+        if "credits" in yaml_data:
+            ryaml_filter(yaml_data, "credits")
+            if problem.settings.credits.authors:
+                yaml_data["author"] = ", ".join(p.name for p in problem.settings.credits.authors)
+        # change source:
+        if problem.settings.source:
+            if len(problem.settings.source) > 1:
+                util.warn(f"Found multiple sources, using '{problem.settings.source[0].name}'.")
+            yaml_data["source"] = problem.settings.source[0].name
+            yaml_data["source_url"] = problem.settings.source[0].url
+        # limits.time_multipliers -> time_multiplier / time_safety_margin
+        if "limits" not in yaml_data or not yaml_data["limits"]:
+            yaml_data["limits"] = CommentedMap()
+        limits = yaml_data["limits"]
+        if "time_multipliers" in limits:
+            ryaml_filter(limits, "time_multipliers")
+        limits["time_multiplier"] = problem.limits.ac_to_time_limit
+        limits["time_safety_margin"] = problem.limits.time_limit_to_tle
+        # drop explicit timelimit
+        if "time_limit" in limits:
+            ryaml_filter(limits, "time_limit")
+        # validator_flags
+        validator_flags = " ".join(
+            problem.get_test_case_yaml(
+                problem.path / "data",
+                OutputValidator.args_key,
+                PrintBar("Getting validator_flags for legacy export"),
+            )
+        )
+        if validator_flags:
+            yaml_data["validator_flags"] = validator_flags
+
+        # handle time limit
+        if not config.args.kattis:
+            (export_dir / ".timelimit").write_text(str(problem.limits.time_limit))
+
+        # Replace \problemname{...} by the value of `name:` in problems.yaml in all .tex files.
+        for f in (export_dir / "statement").iterdir():
+            if f.is_file() and f.suffix == ".tex" and len(f.suffixes) >= 2:
+                lang = f.suffixes[-2][1:]
+                t = f.read_text()
+                match = re.search(r"\\problemname\{\s*(\\problemyamlname)?\s*\}", t)
+                if match:
+                    if lang in problem.settings.name:
+                        t = t.replace(match[0], rf"\problemname{{{problem.settings.name[lang]}}}")
+                        f.unlink()
+                        f.write_text(t)
+                    else:
+                        util.error(f"{f}: no name set for language {lang}.")
+
+        # rename statement dirs
+        if (export_dir / "statement").exists():
+            (export_dir / "statement").rename(export_dir / "problem_statement")
+        for d in ["solution", "problem_slide"]:
+            if not (export_dir / d).is_dir():
+                continue
+            for f in list(util.glob(problem.path, f"{d}/*")):
+                if f.is_file():
+                    out = Path("problem_statement") / f.relative_to(problem.path / d)
+                    if out.exists():
+                        message(
+                            f"Can not export {f.relative_to(problem.path)} as {out}",
+                            "Zip",
+                            output,
+                            color_type=MessageType.WARN,
+                        )
+                    else:
+                        add_file(out, f)
+            shutil.rmtree(export_dir / d)
+
+        # rename output_validator dir
+        if (export_dir / OutputValidator.source_dir).exists():
+            (export_dir / "output_validators").mkdir(parents=True)
+            (export_dir / OutputValidator.source_dir).rename(
+                export_dir / "output_validators" / OutputValidator.source_dir
+            )
 
-    revert_problem_name_cmd = fix_problem_name_cmd(problem)
+        # rename test_group.yaml back to testdata.yaml
+        for f in (export_dir / "data").rglob("test_group.yaml"):
+            f.rename(f.with_name("testdata.yaml"))
+            # TODO potentially, some keys also need to be renamed, but we don't use this often enough for this to matter (I hope)
 
+    # handle yaml updates
+    yaml_path.unlink()
+    write_yaml(yaml_data, yaml_path)
+
+    # Build .ZIP file.
+    message("writing zip file", "Zip", output, color_type=MessageType.LOG)
     try:
         zf = zipfile.ZipFile(output, mode="w", compression=zipfile.ZIP_DEFLATED, allowZip64=False)
 
-        for source, target in sorted(copyfiles):
-            zf.write(source, target, compress_type=zipfile.ZIP_DEFLATED)
-        for target_file, content in sorted(write_file_strs):
-            zf.writestr(target_file, content, compress_type=zipfile.ZIP_DEFLATED)
+        export_dir = problem.tmpdir / "export"
+        for f in sorted(export_dir.rglob("*")):
+            # NOTE: Directories are skipped because ZIP only supports files.
+            if f.is_file():
+                name = f.relative_to(export_dir)
+                zf.write(f, name, compress_type=zipfile.ZIP_DEFLATED)
 
         # Done.
         zf.close()
-        print("done", file=sys.stderr)
+        message("done", "Zip", color_type=MessageType.LOG)
         print(file=sys.stderr)
-
-    finally:
-        revert_problem_name_cmd()
+    except Exception:
+        return False
 
     return True
 
@@ -270,8 +386,15 @@ def build_problem_zip(problem: Problem, output: Path):
 # Assumes the current working directory has: the zipfiles and
 # contest*.{lang}.pdf
 # solutions*.{lang}.pdf
+# problem-slides*.{lang}.pdf
 # Output is <outfile>
-def build_contest_zip(problems, zipfiles, outfile, statement_language):
+def build_contest_zip(
+    problems: list[Problem], zipfiles: list[Path], outfile: str, languages: list[str]
+) -> None:
+    if not has_ryaml:
+        error("zip needs the ruamel.yaml python3 library. Install python[3]-ruamel.yaml.")
+        return
+
     print(f"writing ZIP file {outfile}", file=sys.stderr)
 
     if not config.args.kattis:  # Kattis does not use problems.yaml.
@@ -285,25 +408,28 @@ def build_contest_zip(problems, zipfiles, outfile, statement_language):
     # For general zip export, also create pdfs and a samples zip.
     if not config.args.kattis:
         sampleout = Path("samples.zip")
-        build_samples_zip(problems, sampleout, statement_language)
+        build_samples_zip(problems, sampleout, languages)
 
-        for fname in (
-            [
-                "problems.yaml",
-                "contest.yaml",
-                sampleout,
-            ]
-            + list(Path(".").glob(f"contest*.{statement_language}.pdf"))
-            + list(Path(".").glob(f"solutions*.{statement_language}.pdf"))
-            + list(Path(".").glob(f"problem-slides*.{statement_language}.pdf"))
-        ):
-            if Path(fname).is_file():
+        def add_file(file: Path) -> None:
+            if file.is_file():
+                out = remove_language_pdf_suffix(file, language) if config.args.legacy else file
                 zf.write(
-                    fname,
-                    remove_language_suffix(fname, statement_language),
+                    file,
+                    out,
                     compress_type=zipfile.ZIP_DEFLATED,
                 )
 
+        add_file(Path("problems.yaml"))
+        add_file(Path("contest.yaml"))
+        add_file(sampleout)
+        for language in languages:
+            for name in [
+                *Path(".").glob(f"contest*.{language}.pdf"),
+                *Path(".").glob(f"solutions*.{language}.pdf"),
+                *Path(".").glob(f"problem-slides*.{language}.pdf"),
+            ]:
+                add_file(name)
+
     # For Kattis export, delete the original zipfiles.
     if config.args.kattis:
         for fname in zipfiles:
@@ -315,7 +441,7 @@ def build_contest_zip(problems, zipfiles, outfile, statement_language):
     zf.close()
 
 
-def update_contest_id(cid):
+def update_contest_id(cid: str) -> None:
     if has_ryaml:
         contest_yaml_path = Path("contest.yaml")
         data = read_yaml(contest_yaml_path)
@@ -325,7 +451,7 @@ def update_contest_id(cid):
         error("ruamel.yaml library not found. Update the id manually.")
 
 
-def export_contest(cid: Optional[str]):
+def export_contest(cid: Optional[str]) -> str:
     data = contest_yaml()
 
     if not data:
@@ -365,16 +491,14 @@ def export_contest(cid: Optional[str]):
     new_cid = yaml.load(r.text, Loader=yaml.SafeLoader)
     log(f"Uploaded the contest to contest_id {new_cid}.")
     if new_cid != cid:
-        log("Update contest_id in contest.yaml automatically? [Y/n]")
-        a = input().lower()
-        if a == "" or a[0] == "y":
+        if ask_variable_bool("Update contest_id in contest.yaml automatically"):
             update_contest_id(new_cid)
             log(f"Updated contest_id to {new_cid}")
 
     return new_cid
 
 
-def update_problems_yaml(problems, colors=None):
+def update_problems_yaml(problems: list[Problem], colors: Optional[list[str]] = None) -> None:
     # Update name and time limit values.
     if not has_ryaml:
         log(
@@ -386,16 +510,14 @@ def update_problems_yaml(problems, colors=None):
     path = Path("problems.yaml")
     data = path.is_file() and read_yaml(path) or []
 
-    # DOMjudge does not yet support multilingual problems.yaml files.
-    statement_language = force_single_language(problems)
-
     change = False
     for problem in problems:
         found = False
 
-        problem_name = problem.settings.name
-        if isinstance(problem_name, dict):
-            problem_name = problem_name[statement_language]
+        # ProblemSettings always has `name: dict[str, str]`, but we revert to `str` when `--legacy` is used.
+        problem_name: str | dict[str, str] = problem.settings.name
+        if isinstance(problem_name, dict) and config.args.legacy:
+            problem_name = problem_name[select_languages(problems)[0]]
 
         for d in data:
             if d["id"] == problem.name:
@@ -443,18 +565,24 @@ def update_problems_yaml(problems, colors=None):
         if data != sorted_data:
             change = True
             data = sorted_data
-            label = "X" if contest_yaml().get("testsession") else "A"
+            label = "X" if contest_yaml().get("test_session") else "A"
             for d in data:
                 d["label"] = label
                 label = inc_label(label)
 
+    if config.args.number:
+        n = 0
+        for d in data:
+            n += 1
+            newlabel = f"S{n:>02}"
+            if d["label"] != newlabel:
+                d["label"] = newlabel
+                change = True
+
     if change:
-        if config.args.action in ["update_problems_yaml"]:
-            a = "y"
-        else:
-            log("Update problems.yaml with latest values? [Y/n]")
-            a = input().lower()
-        if a == "" or a[0] == "y":
+        if config.args.action in ["update_problems_yaml"] or ask_variable_bool(
+            "Update problems.yaml with latest values"
+        ):
             write_yaml(data, path)
             log("Updated problems.yaml")
     else:
@@ -462,16 +590,15 @@ def update_problems_yaml(problems, colors=None):
             log("Already up to date")
 
 
-def export_problems(problems, cid):
+def export_problems(problems: list[Problem], cid: str) -> Any:
     if not contest_yaml():
         fatal("Exporting a contest only works if contest.yaml is available and not empty.")
 
     update_problems_yaml(problems)
 
     # Uploading problems.yaml
-    with open("problems.yaml", "r") as file:
-        data = "".join(file.readlines())
     verbose("Uploading problems.yaml:")
+    data = Path("problems.yaml").read_text()
     verbose(data)
     r = call_api(
         "POST",
@@ -494,7 +621,7 @@ def export_problems(problems, cid):
 
 
 # Export a single problem to the specified contest ID.
-def export_problem(problem, cid, pid):
+def export_problem(problem: Problem, cid: str, pid: Optional[str]) -> None:
     if pid:
         log(f"Export {problem.name} to id {pid}")
     else:
@@ -524,7 +651,7 @@ def export_problem(problem, cid, pid):
 
 # Export the contest and individual problems to DOMjudge.
 # Mimicked from https://github.com/DOMjudge/domjudge/blob/main/misc-tools/import-contest.sh
-def export_contest_and_problems(problems, statement_language):
+def export_contest_and_problems(problems: list[Problem], languages: list[str]) -> None:
     if config.args.contest_id:
         cid = config.args.contest_id
     else:
@@ -534,7 +661,11 @@ def export_contest_and_problems(problems, statement_language):
     if not any(contest["id"] == cid for contest in get_contests()):
         cid = export_contest(cid)
 
-    with open(f"contest.{statement_language}.pdf", "rb") as pdf_file:
+    if len(languages) != 1:
+        # TODO: fix this
+        fatal("DOMjudge does not yet support multiple languages")
+
+    with open(f"contest.{languages[0]}.pdf", "rb") as pdf_file:
         r = call_api(
             "POST",
             f"/contests/{cid}/problemset",
@@ -555,24 +686,23 @@ def export_contest_and_problems(problems, statement_language):
 
     check_if_user_has_team()
 
-    def get_problem_id(problem):
+    def get_problem_id(problem: Problem) -> Optional[str]:
         nonlocal ccs_problems
         for p in ccs_problems:
             if problem.name in [p.get("short_name"), p.get("id"), p.get("externalid")]:
                 return p["id"]
+        return None
 
     for problem in problems:
         pid = get_problem_id(problem)
         export_problem(problem, cid, pid)
 
 
-def check_if_user_has_team():
+def check_if_user_has_team() -> None:
     # Not using the /users/{uid} route, because {uid} is either numeric or a string depending on the DOMjudge config.
     users = call_api_get_json("/users")
     if not any(user["username"] == config.args.username and user["team"] for user in users):
         warn(f'User "{config.args.username}" is not associated with a team.')
         warn("Therefore, the jury submissions will not be run by the judgehosts.")
-        log("Continue export to DOMjudge? [N/y]")
-        a = input().lower()
-        if not a or a[0] != "y":
+        if ask_variable_bool("Continue export to DOMjudge", False):
             fatal("Aborted.")
diff --git a/bin/fuzz.py b/bin/fuzz.py
index 6faf18a7c..ac1f86241 100644
--- a/bin/fuzz.py
+++ b/bin/fuzz.py
@@ -1,13 +1,19 @@
 import config
 import problem
-import run
 import random
 import generate
+import shutil
+import signal
+import sys
 import time
 import threading
+from colorama import Style
+from pathlib import Path
+from typing import Any, Optional, TextIO
 
 import parallel
 from util import *
+from run import Run, Submission
 from testcase import Testcase
 from validate import OutputValidator, Mode
 from verdicts import Verdict
@@ -22,9 +28,12 @@
 
 
 class GeneratorTask:
-    def __init__(self, fuzz: "Fuzz", t, i, tmp_id):
+    def __init__(self, fuzz: "Fuzz", t: generate.TestcaseRule, i: int, tmp_id: int):
         self.fuzz = fuzz
-        self.generator = t.generator
+        self.rule = t
+        generator = t.generator
+        assert generator is not None
+        self.generator = generator
         self.solution = t.config.solution
         self.i = i
         self.tmp_id = tmp_id
@@ -37,16 +46,17 @@ def __init__(self, fuzz: "Fuzz", t, i, tmp_id):
         self.save_mutex = threading.Lock()
         self.saved = False
 
-    def run(self, bar):
+    def run(self, bar: ProgressBar) -> None:
         if self._run(bar):
             self.fuzz.finish_task(self.tmp_id)
         else:
             self.fuzz.finish_task(self.tmp_id, 1 + len(self.fuzz.submissions))
 
-    def _run(self, bar):
+    def _run(self, bar: ProgressBar) -> bool:
         # GENERATE THE TEST DATA
         dir = Path("fuzz") / f"tmp_id_{str(self.tmp_id)}"
         cwd = self.fuzz.problem.tmpdir / "tool_runs" / dir
+        shutil.rmtree(cwd, ignore_errors=True)
         cwd.mkdir(parents=True, exist_ok=True)
         name = "testcase"
         infile = cwd / (name + ".in")
@@ -58,8 +68,11 @@ def _run(self, bar):
 
         localbar = bar.start(f"{self.i}: generate")
         result = self.generator.run(localbar, cwd, name, self.seed)
+        self.fuzz.queue.ensure_alive()
         if not result.status:
             return False  # No need to call bar.done() in this case, because the Generator calls bar.error()
+        if ".ans" in self.rule.hardcoded:
+            ansfile.write_text(self.rule.hardcoded[".ans"])
         localbar.done()
 
         testcase = Testcase(self.fuzz.problem, infile, short_path=dir / (name + ".in"))
@@ -67,34 +80,38 @@ def _run(self, bar):
         # Validate the generated .in.
         localbar = bar.start(f"{self.i}: validate input")
         if not testcase.validate_format(Mode.INPUT, bar=localbar, constraints=None):
+            self.fuzz.queue.ensure_alive()
             localbar.done(False)
             return False
+        self.fuzz.queue.ensure_alive()
         localbar.done()
 
         # Generate .ans.
-        if not self.fuzz.problem.interactive and not self.fuzz.problem.multi_pass:
-            if self.solution and not testcase.ans_path.is_file():
-                if testcase.ans_path.is_file():
-                    testcase.ans_path.unlink()
-                # Run the solution and validate the generated .ans.
-                localbar = bar.start(f"{self.i}: generate ans")
-                if not self.solution.run(bar, cwd).status:
+        if not ansfile.is_file():
+            if self.fuzz.problem.settings.ans_is_output:
+                if self.solution:
+                    # Run the solution and validate the generated .ans.
+                    localbar = bar.start(f"{self.i}: generate ans")
+                    if not self.solution.run(bar, cwd).status:
+                        self.fuzz.queue.ensure_alive()
+                        localbar.done()
+                        return False
+                    self.fuzz.queue.ensure_alive()
                     localbar.done()
-                    return False
-                localbar.done()
-
-            if ansfile.is_file():
-                localbar = bar.start(f"{self.i}: validate output")
-                if not testcase.validate_format(Mode.ANSWER, bar=localbar):
-                    localbar.done(False)
-                    return False
-                localbar.done()
-            else:
-                bar.error(f"{self.i}: {ansfile.name} was not generated.")
+            elif self.fuzz.problem.interactive or self.fuzz.problem.multi_pass:
+                ansfile.write_text("")
+
+        if ansfile.is_file():
+            localbar = bar.start(f"{self.i}: validate output")
+            if not testcase.validate_format(Mode.ANSWER, bar=localbar):
+                self.fuzz.queue.ensure_alive()
+                localbar.done(False)
                 return False
+            self.fuzz.queue.ensure_alive()
+            localbar.done()
         else:
-            if not testcase.ans_path.is_file():
-                testcase.ans_path.write_text("")
+            bar.error(f"{self.i}: {ansfile.name} was not generated.")
+            return False
 
         # Run all submissions against the testcase.
         with self.fuzz.queue:
@@ -102,7 +119,13 @@ def _run(self, bar):
                 self.fuzz.queue.put(SubmissionTask(self, submission, testcase, self.tmp_id))
         return True
 
-    def save_test(self, bar):
+    def get_command(self) -> dict[str, str] | str:
+        if not self.fuzz.problem.settings.ans_is_output and ".ans" in self.rule.hardcoded:
+            return {"generate": self.command, "ans": self.rule.hardcoded[".ans"]}
+        else:
+            return self.command
+
+    def save_test(self, bar: ProgressBar, submission: Submission, verdict: Verdict) -> None:
         if self.saved:
             return
         save = False
@@ -111,57 +134,95 @@ def save_test(self, bar):
             if not self.saved:
                 self.saved = True
                 save = True
+        self.fuzz.queue.ensure_alive()
         # only save rule if we set self.saved to True
-        if save and not self.fuzz.queue.aborted:
+        if save:
             localbar = bar.start(f"{self.i}: {self.command}")
             localbar.log("Saving testcase in generators.yaml.")
+            self.fuzz.save_test(self.get_command(), submission, verdict)
+            self.fuzz.queue.ensure_alive()
             localbar.done()
-            self.fuzz.save_test(self.command)
 
 
 class SubmissionTask:
-    def __init__(self, generator_task, submission, testcase, tmp_id):
+    def __init__(
+        self,
+        generator_task: GeneratorTask,
+        submission: Submission,
+        testcase: Testcase,
+        tmp_id: int,
+    ):
         self.generator_task = generator_task
         self.submission = submission
         self.testcase = testcase
         self.tmp_id = tmp_id
 
-    def run(self, bar):
+    def run(self, bar: ProgressBar) -> None:
         self._run(bar)
         self.generator_task.fuzz.finish_task(self.tmp_id)
 
-    def _run(self, bar):
-        r = run.Run(self.generator_task.fuzz.problem, self.submission, self.testcase)
+    def _run(self, bar: ProgressBar) -> None:
+        r = Run(self.generator_task.fuzz.problem, self.submission, self.testcase)
         localbar = bar.start(f"{self.generator_task.i}: {self.submission.name}")
         result = r.run(localbar)
+        self.generator_task.fuzz.queue.ensure_alive()
         if result.verdict != Verdict.ACCEPTED:
-            self.generator_task.save_test(bar)
+            self.generator_task.save_test(bar, self.submission, result.verdict)
             localbar.done(False, f"{result.verdict}!")
         else:
             localbar.done()
 
 
+class FuzzProgressBar(ProgressBar):
+    def __init__(self, queue: parallel.AbstractQueue, prefix: str, max_len: int):
+        super().__init__(prefix, max_len)
+        self.queue = queue
+
+    def _print(
+        self,
+        *objects,
+        sep: str = "",
+        end: str = "\n",
+        file: TextIO = sys.stderr,
+        flush: bool = True,
+    ):
+        self.queue.ensure_alive()
+        super()._print(*objects, sep=sep, end=end, file=file, flush=flush)
+
+
 class Fuzz:
     def __init__(self, problem: problem.Problem):
         self.generators_yaml_mutex = threading.Lock()
         self.problem = problem
+        self.summary: dict[Submission, set[Verdict]] = {}
+        self.added = 0
 
         # GENERATOR INVOCATIONS
         generator_config = generate.GeneratorConfig(self.problem, config.args.testcases)
         self.testcase_rules: list[generate.TestcaseRule] = []
 
         # Filter to only keep valid rules depending on seed without duplicates from count
-        added_testcase_rules = set()
+        added_testcase_rule_data = set()
 
-        def add_testcase(t):
+        def add_testcase(t: generate.TestcaseRule) -> None:
             if (
-                t.in_is_generated
-                and t.parse_error is None
-                and t.generator.uses_seed
-                and t.generator.command_string.strip() not in added_testcase_rules
+                not t.in_is_generated
+                or t.root in config.INVALID_CASE_DIRECTORIES
+                or t.parse_error is not None
+                or t.generator is None
+                or not t.generator.uses_seed
             ):
-                self.testcase_rules.append(t)
-                added_testcase_rules.add(t.generator.command_string.strip())
+                return
+
+            testcase_rule_data = t.generator.command_string.strip()
+            if not problem.settings.ans_is_output and ".ans" in t.hardcoded:
+                testcase_rule_data += t.hardcoded[".ans"]
+
+            if testcase_rule_data in added_testcase_rule_data:
+                return
+
+            self.testcase_rules.append(t)
+            added_testcase_rule_data.add(testcase_rule_data)
 
         generator_config.root_dir.walk(add_testcase, dir_f=None)
         if len(self.testcase_rules) == 0:
@@ -175,7 +236,7 @@ def add_testcase(t):
         # SUBMISSIONS
         self.submissions = self.problem.selected_or_accepted_submissions()
 
-    def run(self):
+    def run(self) -> bool:
         if not has_ryaml:
             error("Fuzzing needs the ruamel.yaml python3 library. Install python[3]-ruamel.yaml.")
             return False
@@ -190,36 +251,45 @@ def run(self):
 
         message("Press CTRL+C to stop\n", "Fuzz", color_type=MessageType.LOG)
 
-        def runner(task: GeneratorTask):
+        def runner(task: GeneratorTask | SubmissionTask) -> None:
             task.run(bar)
 
-        # config.args.no_bar = True
-        # max(len(s.name) for s in self.submissions)
-        bar = ProgressBar("Fuzz", max_len=60)
         self.start_time = time.monotonic()
         self.iteration = 0
         self.tasks = 0
         self.queue = parallel.new_queue(runner, pin=True)
 
-        def soft_exit(sig, frame):
+        # pool of ids used for generators
+        self.tmp_ids = 2 * max(1, self.queue.num_threads) + 1
+        self.free_tmp_id = {*range(self.tmp_ids)}
+        self.tmp_id_count = [0] * self.tmp_ids
+
+        max_len = max(
+            25,
+            *[len(s.name) for s in self.submissions],
+            *[
+                len(t.generator.cache_command(seed=2**32))
+                for t in self.testcase_rules
+                if t.generator is not None
+            ],
+        )
+        max_len += len(f"{self.tmp_ids}: ")
+        bar = FuzzProgressBar(self.queue, "Fuzz", max_len=max_len)
+
+        def soft_exit(sig: Any, frame: Any) -> None:
             if self.queue.aborted:
                 fatal("Running interrupted", force=True)
             else:
                 self.queue.abort()
                 with bar:
-                    bar.clearline()
+                    print(bar.carriage_return, file=sys.stderr)
                     message(
                         "Running interrupted (waiting on remaining tasks)\n",
                         "\nFuzz",
                         color_type=MessageType.ERROR,
                     )
 
-        signal.signal(signal.SIGINT, soft_exit)
-
-        # pool of ids used for generators
-        self.tmp_ids = 2 * max(1, self.queue.num_threads) + 1
-        self.free_tmp_id = {*range(self.tmp_ids)}
-        self.tmp_id_count = [0] * self.tmp_ids
+        old_handler = signal.signal(signal.SIGINT, soft_exit)
 
         # add first generator task
         self.finish_task()
@@ -229,16 +299,24 @@ def soft_exit(sig, frame):
         # At this point, no new tasks may be started anymore.
         self.queue.done()
 
+        signal.signal(signal.SIGINT, old_handler)
+
+        for submission, verdicts in self.summary.items():
+            msg = ", ".join(f"{v.color()}{v.short()}{Style.RESET_ALL}" for v in sorted(verdicts))
+            message(msg, "Fuzz", submission.name)
+        message(f"Found {self.added} testcases in total.", "Fuzz")
+
         if self.queue.aborted:
-            fatal("Running interrupted", force=True)
+            fatal("Running interrupted")
 
         bar.done()
         bar.finalize()
+
         return True
 
     # finish task from generator with tmp_id
     # also add new tasks if queue becomes too empty
-    def finish_task(self, tmp_id=None, count=1):
+    def finish_task(self, tmp_id: Optional[int] = None, count: int = 1) -> None:
         with self.queue:
             # return tmp_id (and reuse it if all submissions are finished)
             if tmp_id is not None:
@@ -257,18 +335,20 @@ def finish_task(self, tmp_id=None, count=1):
                 self.iteration += 1
                 # 1 new generator tasks which will also create one task per submission
                 new_tasks = 1 + len(self.submissions)
-                tmp_id = min(self.free_tmp_id)
-                self.free_tmp_id.remove(tmp_id)
-                self.tmp_id_count[tmp_id] = new_tasks
+                new_tmp_id = min(self.free_tmp_id)
+                self.free_tmp_id.remove(new_tmp_id)
+                self.tmp_id_count[new_tmp_id] = new_tasks
                 self.tasks += new_tasks
                 self.queue.put(
-                    GeneratorTask(self, testcase_rule, self.iteration, tmp_id),
+                    GeneratorTask(self, testcase_rule, self.iteration, new_tmp_id),
                     priority=1,
                 )
 
     # Write new rule to yaml
     # lock between read and write to ensure that no rule gets lost
-    def save_test(self, command):
+    def save_test(
+        self, command: dict[str, str] | str, submission: Submission, verdict: Verdict
+    ) -> None:
         with self.generators_yaml_mutex:
             generators_yaml = self.problem.path / "generators/generators.yaml"
             data = None
@@ -286,3 +366,6 @@ def save_test(self, command):
 
             # Overwrite generators.yaml.
             write_yaml(data, generators_yaml)
+
+            self.summary.setdefault(submission, set()).add(verdict)
+            self.added += 1
diff --git a/bin/generate.py b/bin/generate.py
index 06dd81d00..814fa97ad 100644
--- a/bin/generate.py
+++ b/bin/generate.py
@@ -1,19 +1,22 @@
+import collections
 import random
 import re
-import shutil
-import collections
 import secrets
+import shutil
+import sys
+import time
 
 from collections.abc import Callable, Sequence
 from colorama import Fore, Style
 from pathlib import Path, PurePosixPath
-from typing import Final, overload
+from typing import Any, Final, Iterable, Optional, overload
 
 import config
 import parallel
 import program
 import run
 import validate
+import visualize
 from testcase import Testcase
 from verdicts import Verdict
 from problem import Problem
@@ -86,7 +89,6 @@ def resolve_path(path, *, allow_absolute, allow_relative):
 # The following classes inherit from Invocation:
 # - GeneratorInvocation
 # - SolutionInvocation
-# - VisualizerInvocation
 class Invocation:
     SEED_REGEX: Final[re.Pattern[str]] = re.compile(r"\{seed(:[0-9]+)?\}")
     NAME_REGEX: Final[re.Pattern[str]] = re.compile(r"\{name\}")
@@ -119,7 +121,7 @@ def __init__(self, problem: Problem, string: str, *, allow_absolute: bool, allow
             raise ParseException("{seed(:[0-9]+)} may appear at most once.")
 
         # Automatically set self.program when that program has been built.
-        self.program: Optional[program.Generator | program.Visualizer | run.Submission] = None
+        self.program: Optional[program.Generator | run.Submission] = None
 
         def callback(program):
             self.program = program
@@ -170,7 +172,7 @@ def run(self, bar, cwd, name, seed, retries=1):
             )
             if result.status:
                 break
-            if not result.retry:
+            if result.status == ExecStatus.TIMEOUT:
                 break
 
         if not result.status:
@@ -187,30 +189,6 @@ def run(self, bar, cwd, name, seed, retries=1):
         return result
 
 
-class VisualizerInvocation(Invocation):
-    def __init__(self, problem, string):
-        super().__init__(problem, string, allow_absolute=True, allow_relative=False)
-
-    # Run the visualizer, taking {name} as a command line argument.
-    # Stdin and stdout are not used.
-    # {name} is no longer used and hardcoded to `testcase` (see #273), and {seed} is also not used.
-    def run(self, bar, cwd):
-        assert isinstance(self.program, program.Visualizer), "Visualizer program must be built!"
-
-        result = self.program.run(cwd, args=self._sub_args())
-
-        if result.status == ExecStatus.TIMEOUT:
-            bar.debug(f"{Style.RESET_ALL}-> {shorten_path(self.problem, cwd)}")
-            bar.error(f"Visualizer TIMEOUT after {result.duration}s")
-        elif not result.status:
-            bar.debug(f"{Style.RESET_ALL}-> {shorten_path(self.problem, cwd)}")
-            bar.error("Visualizer failed", result.err)
-
-        if result.status and config.args.error and result.err:
-            bar.log("stderr", result.err)
-        return result
-
-
 class SolutionInvocation(Invocation):
     def __init__(self, problem, string):
         super().__init__(problem, string, allow_absolute=True, allow_relative=False)
@@ -239,11 +217,10 @@ def run(self, bar, cwd):
             bar.log("stderr", result.err)
         return result
 
-    def run_interaction(self, bar, cwd, t):
+    def generate_interaction(self, bar, cwd, t):
         in_path = cwd / "testcase.in"
         interaction_path = cwd / "testcase.interaction"
-        if interaction_path.is_file():
-            return True
+        interaction_path.unlink(missing_ok=True)
 
         testcase = Testcase(self.problem, in_path, short_path=(t.path.parent / (t.name + ".in")))
         assert isinstance(self.program, run.Submission)
@@ -268,7 +245,7 @@ def default_solution_path(generator_config):
     if config.args.default_solution:
         if generator_config.has_yaml:
             message(
-                f"""--default-solution Ignored. Set the default solution in the generator.yaml!
+                f"""--default-solution Ignored. Set the default solution in the generators.yaml!
 solution: /{config.args.default_solution}""",
                 "generators.yaml",
                 color_type=MessageType.WARN,
@@ -297,7 +274,7 @@ def default_solution_path(generator_config):
             raw = f"solution: /{solution.relative_to(problem.path)}\n" + raw
             yaml_path.write_text(raw)
             message(
-                f"No solution specified. {solution_short_path} added as default solution in the generator.yaml",
+                f"No solution specified. {solution_short_path} added as default solution in the generators.yaml",
                 "generators.yaml",
                 color_type=MessageType.LOG,
             )
@@ -325,30 +302,27 @@ def __init__(self, generator_config):
     "generate",
     "copy",
     "solution",
-    "visualizer",
     "random_salt",
     "retries",
     "count",
 ] + [e[1:] for e in config.KNOWN_TEXT_DATA_EXTENSIONS]
-RESERVED_TESTCASE_KEYS: Final[Sequence[str]] = ["data", "testdata.yaml", "include"]
+RESERVED_TESTCASE_KEYS: Final[Sequence[str]] = ["data", "test_group.yaml", "include"]
 KNOWN_DIRECTORY_KEYS: Final[Sequence[str]] = [
     "type",
     "data",
-    "testdata.yaml",
+    "test_group.yaml",
     "include",
     "solution",
-    "visualizer",
     "random_salt",
     "retries",
 ]
 RESERVED_DIRECTORY_KEYS: Final[Sequence[str]] = ["command"]
 KNOWN_ROOT_KEYS: Final[Sequence[str]] = ["generators", "parallel", "version"]
-DEPRECATED_ROOT_KEYS: Final[Sequence[str]] = ["gitignore_generated"]
+DEPRECATED_ROOT_KEYS: Final[Sequence[str]] = ["gitignore_generated", "visualizer"]
 
 
 # Holds all inheritable configuration options. Currently:
 # - config.solution
-# - config.visualizer
 # - config.random_salt
 class Config:
     # Used at each directory or testcase level.
@@ -360,13 +334,6 @@ def parse_solution(p, x, path):
             return None
         return SolutionInvocation(p, x)
 
-    @staticmethod
-    def parse_visualizer(p, x, path):
-        assert_type("Visualizer", x, [type(None), str], path)
-        if x is None:
-            return None
-        return VisualizerInvocation(p, x)
-
     @staticmethod
     def parse_random_salt(p, x, path):
         assert_type("Random_salt", x, [type(None), str], path)
@@ -377,7 +344,6 @@ def parse_random_salt(p, x, path):
     INHERITABLE_KEYS: Final[Sequence] = [
         # True: use an AC submission by default when the solution: key is not present.
         ("solution", True, parse_solution),
-        ("visualizer", None, parse_visualizer),
         ("random_salt", "", parse_random_salt),
         # Non-portable keys only used by BAPCtools:
         # The number of retries to run a generator when it fails, each time incrementing the {seed}
@@ -386,7 +352,6 @@ def parse_random_salt(p, x, path):
     ]
 
     solution: SolutionInvocation
-    visualizer: Optional[VisualizerInvocation]
     random_salt: str
     retries: int
 
@@ -428,14 +393,31 @@ def __init__(self, problem, key, name, yaml, parent):
 
 
 class TestcaseRule(Rule):
-    def __init__(self, problem, generator_config, key, name: str, yaml, parent, count_index):
+    def __init__(
+        self,
+        problem: Problem,
+        generator_config,
+        key,
+        name: str,
+        yaml: dict[str, Any],
+        parent,
+        count_index,
+    ):
         assert is_testcase(yaml)
 
         # if not None rule will be skipped during generation
-        self.parse_error = None
+        self.parse_error: Optional[str] = None
 
         # Whether this testcase is a sample.
-        self.sample = len(parent.path.parts) > 0 and parent.path.parts[0] == "sample"
+        self.sample: bool = len(parent.path.parts) > 0 and parent.path.parts[0] == "sample"
+        # each test case needs some kind of input
+        self.required_in: list[list[str]] = [[".in"]]
+        if self.sample:
+            # for samples a statement in file is also sufficient
+            self.required_in.append([".in.statement"])
+            if problem.interactive or problem.multi_pass:
+                # if .interaction is supported that is also fine as long as input download is provided as well.
+                self.required_in.append([".interaction", ".in.download"])
 
         # 1. Generator
         self.generator = None
@@ -474,19 +456,17 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, coun
 
         # root in /data
         self.root = self.path.parts[0]
-        if self.root == "bad":
-            message(
-                "bad is deprecated. Use {invalid_input,invalid_answer} instead.",
-                self.path,
-                color_type=MessageType.WARN,
-            )
-
-        if not config.COMPILED_FILE_NAME_REGEX.fullmatch(name + ".in"):
-            raise ParseException("Testcase does not have a valid name.")
 
+        # files to consider for hashing
+        hashes = {}
         try:
-            # files to consider for hashing
-            hashes = {}
+            if not config.COMPILED_FILE_NAME_REGEX.fullmatch(name + ".in"):
+                raise ParseException("Test case does not have a valid name.")
+
+            if name == "test_group":
+                raise ParseException(
+                    "Test case must not be named 'test_group', this clashes with the group-level 'test_group.yaml'."
+                )
 
             if yaml is None:
                 raise ParseException(
@@ -506,24 +486,42 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, coun
                         yaml = {"copy": yaml["generate"][:-3]}
 
                 # checks
-                if not any(x in yaml for x in ["generate", "copy", "in", "interaction"]):
+                satisfied = False
+                msg = []
+                for required in [[".generate"], [".copy"]] + self.required_in:
+                    satisfied = satisfied or all(x[1:] in yaml for x in required)
+                    msg.append(" and ".join([x[1:] for x in required]))
+                if not satisfied:
+                    raise ParseException(f"Testcase requires at least one of: {', '.join(msg)}.")
+                if not problem.interactive and not problem.multi_pass and "interaction" in yaml:
                     raise ParseException(
-                        'Testcase requires at least one key in "generate", "copy", "in", "interaction".'
+                        "Testcase cannot have 'interaction' key for non-interactive/non-multi-pass problem."
                     )
+                if not self.sample:
+                    for ext in config.KNOWN_SAMPLE_TESTCASE_EXTENSIONS:
+                        if ext[1:] in yaml:
+                            raise ParseException(f"Non sample testcase cannot use '{ext[1:]}")
                 if "submission" in yaml and "ans" in yaml:
-                    raise ParseException('Testcase cannot specify both "submissions" and "ans".')
+                    raise ParseException("Testcase cannot specify both 'submissions' and 'ans'.")
                 if "count" in yaml and not isinstance(yaml["count"], int):
                     value = yaml["count"]
-                    raise ParseException(f'Testcase expected int for "count" but found {value}.')
+                    raise ParseException(f"Testcase expected int for 'count' but found {value}.")
 
                 # 1. generate
                 if "generate" in yaml:
                     assert_type("generate", yaml["generate"], str)
                     if len(yaml["generate"]) == 0:
-                        raise ParseException("`generate` must not be empty.")
+                        raise ParseException("'generate' must not be empty.")
 
-                    # replace count
+                    # first replace {{constants}}
                     command_string = yaml["generate"]
+                    command_string = substitute(
+                        command_string,
+                        problem.settings.constants,
+                        pattern=config.CONSTANT_SUBSTITUTE_REGEX,
+                    )
+
+                    # then replace {count} and {seed}
                     if "{count}" in command_string:
                         if "count" in yaml:
                             command_string = command_string.replace(
@@ -570,13 +568,18 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, coun
                     self.rule["copy"] = str(self.copy)
                     for ext in config.KNOWN_TESTCASE_EXTENSIONS:
                         if self.copy.with_suffix(ext).is_file():
-                            hashes[ext] = hash_file(self.copy.with_suffix(ext))
+                            hashes[ext] = hash_file_content(self.copy.with_suffix(ext))
 
-                # 3. hardcoded
+                # 3. hardcoded strings (or, for the Test Case Configuration, a yaml mapping)
                 for ext in config.KNOWN_TEXT_DATA_EXTENSIONS:
                     if ext[1:] in yaml:
                         value = yaml[ext[1:]]
-                        assert_type(ext, value, str)
+                        if ext == ".yaml":
+                            assert_type(ext, value, dict)
+                            value = write_yaml(value)
+                            assert value is not None
+                        else:
+                            assert_type(ext, value, str)
                         if len(value) > 0 and value[-1] != "\n":
                             value += "\n"
                         self.hardcoded[ext] = value
@@ -584,9 +587,8 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, coun
                 if ".in" in self.hardcoded:
                     self.in_is_generated = False
                     self.rule["in"] = self.hardcoded[".in"]
-                for ext in config.KNOWN_TESTCASE_EXTENSIONS:
-                    if ext in self.hardcoded:
-                        hashes[ext] = hash_string(self.hardcoded[ext])
+                for ext, value in self.hardcoded.items():
+                    hashes[ext] = hash_string(value)
 
             # Warn/Error for unknown keys.
             for key in yaml:
@@ -601,29 +603,29 @@ def __init__(self, problem, generator_config, key, name: str, yaml, parent, coun
                             color_type=MessageType.LOG,
                         )
 
-            if ".in" not in hashes:
-                generator_config.n_parse_error += 1
-                # An error is shown during generate.
-                return
-
-            # build ordered list of hashes we want to consider
-            hs = [hashes[ext] for ext in config.KNOWN_TESTCASE_EXTENSIONS if ext in hashes]
-
             # combine hashes
-            if len(hs) == 1:
-                self.hash = hs[0]
-            else:
-                self.hash = combine_hashes(hs)
+            self.hash = combine_hashes_dict(hashes)
 
             if self.hash in generator_config.rules_cache:
                 self.copy_of = generator_config.rules_cache[self.hash]
             else:
                 generator_config.rules_cache[self.hash] = self
+
         except ParseException as e:
             # For testcases we can handle the parse error locally since this does not influence much else
             self.parse_error = e.message
             generator_config.n_parse_error += 1
 
+        if not any(all(ext in hashes for ext in required) for required in self.required_in):
+            generator_config.n_parse_error += 1
+            # An error is shown during generate.
+
+    def _has_required_in(t, infile: Path) -> bool:
+        for required in t.required_in:
+            if all(infile.with_suffix(ext).is_file() for ext in required):
+                return True
+        return False
+
     def link(t, problem, generator_config, bar, dst):
         src_dir = problem.path / "data" / t.path.parent
         src = src_dir / (t.name + ".in")
@@ -697,58 +699,62 @@ def validate_in(t, problem: Problem, testcase: Testcase, meta_yaml: dict, bar: P
             )
         return True
 
-    def validate_ans(t, problem: Problem, testcase: Testcase, meta_yaml: dict, bar: ProgressBar):
+    def validate_ans_and_out(
+        t, problem: Problem, testcase: Testcase, meta_yaml: dict, bar: ProgressBar
+    ):
         infile = problem.tmpdir / "data" / t.hash / "testcase.in"
         assert infile.is_file()
 
         if testcase.root == "invalid_input":
             return True
 
-        ansfile = problem.tmpdir / "data" / t.hash / "testcase.ans"
-        assert ansfile.is_file()
+        ansfile = infile.with_suffix(".ans")
+        if not ansfile.is_file():
+            bar.error("No .ans file was generated!")
+            return False
 
-        if problem.interactive or problem.multi_pass:
-            if ansfile.stat().st_size != 0:
-                interactive = "interaction " if problem.interactive else ""
-                multi_pass = "multi-pass " if problem.multi_pass else ""
-                bar.warn(f".ans file for {interactive}{multi_pass}problem is expected to be empty.")
-        else:
-            size = ansfile.stat().st_size
-            if (
-                size <= problem.limits.output * 1024 * 1024
-                and problem.limits.output * 1024 * 1024 < 2 * size
-            ):  # we already warn if the limit is exceeded
-                bar.warn(
-                    f".ans file is {size / 1024 / 1024:.3f}MiB, which is close to output limit (set limits.output to at least {(2 * size + 1024 * 1024 - 1) // 1024 // 1024}MiB in problem.yaml)"
-                )
+        outfile = infile.with_suffix(".out")
+        if not outfile.is_file() and testcase.root in ["invalid_output", "valid_output"]:
+            bar.error("No .out file was generated!")
+            return False
 
-            answer_validator_hashes = {
-                **testcase.validator_hashes(validate.AnswerValidator, bar),
-                **testcase.validator_hashes(validate.OutputValidator, bar),
-            }
-            if all(h in meta_yaml["answer_validator_hashes"] for h in answer_validator_hashes):
-                return True
+        ans_out_validator_hashes = testcase.validator_hashes(validate.AnswerValidator, bar).copy()
+        output_validator_hashes = testcase.validator_hashes(validate.OutputValidator, bar)
 
-            if not testcase.validate_format(
-                validate.Mode.ANSWER,
-                bar=bar,
-                warn_instead_of_error=config.args.no_validators,
-            ):
-                if not config.args.no_validators:
-                    bar.debug("Use generate --no-validators to ignore validation results.")
-                    bar.done(False)
-                    return False
-            else:
-                for h in answer_validator_hashes:
-                    meta_yaml["answer_validator_hashes"][h] = answer_validator_hashes[h]
-                write_yaml(
-                    meta_yaml,
-                    problem.tmpdir / "data" / t.hash / "meta_.yaml",
-                    allow_yamllib=True,
-                )
+        mode = validate.Mode.ANSWER
+        if testcase.root == "invalid_answer":
+            mode = validate.Mode.INVALID
+        elif testcase.root == "invalid_output":
+            ans_out_validator_hashes.update(output_validator_hashes)
+            mode = validate.Mode.INVALID
+        elif testcase.root == "valid_output" or outfile.is_file():
+            ans_out_validator_hashes.update(output_validator_hashes)
+            mode = validate.Mode.VALID_OUTPUT
+
+        if all(h in meta_yaml["ans_out_validator_hashes"] for h in ans_out_validator_hashes):
+            return True
+
+        if not testcase.validate_format(
+            mode,
+            bar=bar,
+            warn_instead_of_error=config.args.no_validators,
+        ):
+            if not config.args.no_validators:
+                bar.debug("Use generate --no-validators to ignore validation results.")
+                bar.done(False)
+                return False
+        else:
+            for h in ans_out_validator_hashes:
+                meta_yaml["ans_out_validator_hashes"][h] = ans_out_validator_hashes[h]
+            meta_yaml["visualizer_hash"] = dict()
+            write_yaml(
+                meta_yaml,
+                problem.tmpdir / "data" / t.hash / "meta_.yaml",
+                allow_yamllib=True,
+            )
         return True
 
-    def generate(t, problem, generator_config, parent_bar):
+    def generate(t, problem: Problem, generator_config, parent_bar):
         bar = parent_bar.start(str(t.path))
 
         t.generate_success = False
@@ -792,7 +798,8 @@ def init_meta():
                     "generated_extensions": [],
                     "input_validator_hashes": dict(),
                     "solution_hash": dict(),
-                    "answer_validator_hashes": dict(),
+                    "interactor_hash": dict(),
+                    "ans_out_validator_hashes": dict(),
                     "visualizer_hash": dict(),
                 }
             meta_yaml["rule"] = t.rule
@@ -876,7 +883,7 @@ def generate_from_rule():
 
             if not infile.is_file() or meta_yaml.get("rule_hashes") != rule_hashes:
                 # clear all generated files
-                shutil.rmtree(cwd)
+                shutil.rmtree(cwd, ignore_errors=True)
                 cwd.mkdir(parents=True, exist_ok=True)
                 meta_yaml = init_meta()
 
@@ -903,15 +910,13 @@ def generate_from_rule():
 
                 # Step 3: Write hardcoded files.
                 for ext, contents in t.hardcoded.items():
-                    if contents == "" and t.root not in ["bad", "invalid_input"]:
-                        bar.error(f"Hardcoded {ext} data must not be empty!")
-                        return False
-                    else:
-                        infile.with_suffix(ext).write_text(contents)
+                    # substitute in contents? -> No!
+                    infile.with_suffix(ext).write_text(contents)
 
                 # Step 4: Error if infile was not generated.
-                if not infile.is_file():
-                    bar.error("No .in file was generated!")
+                if not t._has_required_in(infile):
+                    msg = ", ".join(" and ".join(required) for required in t.required_in)
+                    bar.error(f"No {msg} file was generated!")
                     return False
 
                 # Step 5: save which files where generated
@@ -928,10 +933,10 @@ def generate_from_rule():
             else:
                 check_deterministic(False)
 
-            assert infile.is_file(), f"Failed to generate in file: {infile}"
+            assert t._has_required_in(infile), f"Failed to generate in file: {infile.name}"
             return True
 
-        def generate_from_solution():
+        def generate_from_solution(testcase: Testcase, bar: ProgressBar):
             nonlocal meta_yaml
 
             if testcase.root in [*config.INVALID_CASE_DIRECTORIES, "valid_output"]:
@@ -950,30 +955,42 @@ def generate_from_solution():
                     "solution": None,
                 }
 
-            def needed(ext):
+            def needed(ext, interactor_hash=None):
                 if ext in meta_yaml["generated_extensions"]:
                     return False
                 if not infile.with_suffix(ext).is_file():
                     return True
+                if (
+                    interactor_hash is not None
+                    and meta_yaml.get("interactor_hash") != interactor_hash
+                ):
+                    return True
                 return meta_yaml.get("solution_hash") != solution_hash
 
             used_solution = False
             changed_ans = False
-            if problem.interactive or problem.multi_pass:
-                # Generate empty ans file for interactive/multi-pass problems
+            if not problem.settings.ans_is_output:
+                # Generate empty ans file
                 if ".ans" not in meta_yaml["generated_extensions"]:
-                    if not ansfile.is_file() or ansfile.stat().st_size != 0:
+                    if not ansfile.is_file() and (problem.interactive or problem.multi_pass):
                         ansfile.write_text("")
                         changed_ans = True
-                # For interactive/multi-pass problems, run the solution and generate a .interaction.
-                if (
-                    t.config.solution
-                    and (testcase.root == "sample" or config.args.interaction)
-                    and needed(".interaction")
-                ):
-                    if not t.config.solution.run_interaction(bar, cwd, t):
-                        return False
-                    used_solution = True
+                # For interactive/multi-pass problems, run the solution and generate a .interaction if necessary.
+                if problem.interactive or problem.multi_pass:
+                    interactor_hash = testcase.validator_hashes(validate.OutputValidator, bar)
+                    if (
+                        t.config.solution
+                        and (testcase.root == "sample" or config.args.interaction)
+                        and needed(".interaction", interactor_hash)
+                        and not any(
+                            infile.with_suffix(ext).is_file()
+                            for ext in [".out", ".in.statement", ".ans.statement"]
+                        )
+                    ):
+                        if not t.config.solution.generate_interaction(bar, cwd, t):
+                            return False
+                        used_solution = True
+                        meta_yaml["interactor_hash"] = interactor_hash
             else:
                 # Generate a .ans if not already generated by earlier steps.
                 if needed(".ans"):
@@ -991,7 +1008,7 @@ def needed(ext):
             if used_solution:
                 meta_yaml["solution_hash"] = solution_hash
             if changed_ans:
-                meta_yaml["answer_validator_hashes"] = dict()
+                meta_yaml["ans_out_validator_hashes"] = dict()
                 meta_yaml["visualizer_hash"] = dict()
             if changed_ans or used_solution:
                 write_yaml(meta_yaml, meta_path, allow_yamllib=True)
@@ -999,27 +1016,118 @@ def needed(ext):
             assert ansfile.is_file(), f"Failed to generate ans file: {ansfile}"
             return True
 
-        def generate_visualization():
+        def generate_visualization(testcase: Testcase, bar: ProgressBar):
             nonlocal meta_yaml
 
-            if not t.config.visualizer:
+            if testcase.root in config.INVALID_CASE_DIRECTORIES:
                 return True
             if config.args.no_visualizer:
                 return True
 
+            # Generate visualization
+            in_path = cwd / "testcase.in"
+            ans_path = cwd / "testcase.ans"
+            out_path = cwd / "testcase.out"
+            assert in_path.is_file()
+            assert ans_path.is_file()
+
+            feedbackdir = in_path.with_suffix(".feedbackdir")
+            image_files = [f"judgeimage{ext}" for ext in config.KNOWN_VISUALIZER_EXTENSIONS] + [
+                f"teamimage{ext}" for ext in config.KNOWN_VISUALIZER_EXTENSIONS
+            ]
+
+            def use_feedback_image(feedbackdir: Path, source: str) -> None:
+                for name in image_files:
+                    path = feedbackdir / name
+                    if path.exists():
+                        ensure_symlink(in_path.with_suffix(path.suffix), path)
+                        bar.log(f"Using {name} from {source} as visualization")
+                        return
+
+            visualizer: Optional[visualize.AnyVisualizer] = problem.visualizer(
+                visualize.InputVisualizer
+            )
+            output_visualizer = problem.visualizer(visualize.OutputVisualizer)
+            if output_visualizer is not None:
+                if out_path.is_file() or problem.settings.ans_is_output:
+                    if visualizer is None or out_path.is_file():
+                        visualizer = output_visualizer
+                    if not out_path.is_file():
+                        assert problem.settings.ans_is_output
+                        out_path = ans_path
+
+            if visualizer is None:
+                for ext in config.KNOWN_VISUALIZER_EXTENSIONS:
+                    in_path.with_suffix(ext).unlink(True)
+                use_feedback_image(feedbackdir, "validator")
+                return True
+
+            visualizer_args = testcase.test_case_yaml_args(visualizer, bar)
             visualizer_hash = {
-                "visualizer_hash": t.config.visualizer.hash(),
-                "visualizer": t.config.visualizer.cache_command(),
+                "visualizer_hash": visualizer.hash,
+                "visualizer_args": visualizer_args,
             }
 
             if meta_yaml.get("visualizer_hash") == visualizer_hash:
                 return True
 
-            # Generate visualization
-            t.config.visualizer.run(bar, cwd)
+            for ext in config.KNOWN_VISUALIZER_EXTENSIONS:
+                in_path.with_suffix(ext).unlink(True)
 
-            meta_yaml["visualizer_hash"] = visualizer_hash
-            write_yaml(meta_yaml, meta_path, allow_yamllib=True)
+            if isinstance(visualizer, visualize.InputVisualizer):
+                result = visualizer.run(in_path, ans_path, cwd, visualizer_args)
+            else:
+                feedbackcopy = in_path.with_suffix(".feedbackcopy")
+                shutil.rmtree(feedbackcopy)
+
+                def skip_images(src: str, content: list[str]) -> list[str]:
+                    return [] if src != str(feedbackdir) else image_files
+
+                shutil.copytree(feedbackdir, feedbackcopy, ignore=skip_images)
+
+                result = visualizer.run(
+                    in_path,
+                    ans_path,
+                    out_path if not problem.interactive else None,
+                    feedbackcopy,
+                    visualizer_args,
+                )
+                if result.status:
+                    use_feedback_image(feedbackdir, "output_visualizer")
+
+            if result.status == ExecStatus.TIMEOUT:
+                bar.debug(f"{Style.RESET_ALL}-> {shorten_path(problem, cwd)}")
+                bar.error(
+                    f"{type(visualizer).visualizer_type.capitalize()} Visualizer TIMEOUT after {result.duration}s"
+                )
+            elif not result.status:
+                bar.debug(f"{Style.RESET_ALL}-> {shorten_path(problem, cwd)}")
+                bar.error(
+                    f"{type(visualizer).visualizer_type.capitalize()} Visualizer failed", result.err
+                )
+
+            if result.status and config.args.error and result.err:
+                bar.log("stderr", result.err)
+
+            if result.status:
+                meta_yaml["visualizer_hash"] = visualizer_hash
+                write_yaml(meta_yaml, meta_path, allow_yamllib=True)
+
+            # errors in the visualizer are not critical
+            return True
+
+        def generate_empty_interactive_sample_ans():
+            if not t.sample:
+                return True
+            if not problem.interactive and not problem.multi_pass:
+                return True
+            for ext in ["", ".statement", ".download"]:
+                ans_ext_file = infile.with_suffix(f".ans{ext}")
+                if ans_ext_file.exists():
+                    return True
+                if infile.with_suffix(f".in{ext}").exists():
+                    ans_ext_file.write_text("")
+                    return True
             return True
 
         def copy_generated():
@@ -1064,39 +1172,36 @@ def copy_generated():
                     # both source and target do not exist
                     pass
 
-        def add_testdata_to_cache():
-            # Used to identify generated testcases
+        def add_test_case_to_cache():
+            # Used to identify generated test cases
             generator_config.hashed_in.add(hash_file_content(infile))
 
-            # Store the generated testdata for deduplication test cases.
+            # Store the hashes of the generated files for this test case to detect duplicate test cases.
             hashes = {}
 
-            # remove files that should not be considered for this testcase
-            extensions = list(config.KNOWN_TESTCASE_EXTENSIONS)
-            if t.root not in [*config.INVALID_CASE_DIRECTORIES[1:], "valid_output"]:
-                extensions.remove(".ans")
-            if t.root not in [*config.INVALID_CASE_DIRECTORIES[2:], "valid_output"]:
-                extensions.remove(".out")
+            # consider specific files for the uniqueness of this testcase
+            relevant_files = {
+                "invalid_input": [".in"],
+                "invalid_answer": [".in", ".ans"],
+                "invalid_output": [".in", ".ans", ".out"],
+                "valid_output": [".in", ".ans", ".out"],
+            }
+            relevant_files_default = [".in"] if problem.settings.ans_is_output else [".in", ".ans"]
+            extensions = relevant_files.get(t.root, relevant_files_default)
 
             for ext in extensions:
                 if target_infile.with_suffix(ext).is_file():
-                    hashes[ext] = hash_file(target_infile.with_suffix(ext))
-
-            # build ordered list of hashes we want to consider
-            hs = [hashes[ext] for ext in extensions if ext in hashes]
+                    hashes[ext] = hash_file_content(target_infile.with_suffix(ext))
 
             # combine hashes
-            if len(hs) == 1:
-                test_hash = hs[0]
-            else:
-                test_hash = combine_hashes(hs)
+            test_hash = combine_hashes_dict(hashes)
 
             # check for duplicates
-            if test_hash not in generator_config.generated_testdata:
-                generator_config.generated_testdata[test_hash] = t
+            if test_hash not in generator_config.generated_test_cases:
+                generator_config.generated_test_cases[test_hash] = t
             else:
                 bar.warn(
-                    f"Testcase {t.path} is equal to {generator_config.generated_testdata[test_hash].path}."
+                    f"Testcase {t.path} is equal to {generator_config.generated_test_cases[test_hash].path}."
                 )
 
         # Step 1: handle non unique generate entry
@@ -1115,29 +1220,35 @@ def add_testdata_to_cache():
         if not generate_from_rule():
             return
 
-        # Step 3: check .in if needed
-        testcase = Testcase(problem, infile, short_path=t.path / t.name)
-        if not t.validate_in(problem, testcase, meta_yaml, bar):
-            return
+        if infile.is_file():
+            # Step 3: check .in if needed
+            testcase = Testcase(problem, infile, short_path=t.path / t.name)
+            if not t.validate_in(problem, testcase, meta_yaml, bar):
+                return
 
-        # Step 4: generate .ans and .interaction if needed
-        if not generate_from_solution():
-            return
+            # Step 4: generate .ans and .interaction if needed
+            if not generate_from_solution(testcase, bar):
+                return
 
-        # Step 5: validate .ans if needed
-        if not t.validate_ans(problem, testcase, meta_yaml, bar):
-            return
+            # Step 5: validate .ans (and .out if it exists)
+            if not t.validate_ans_and_out(problem, testcase, meta_yaml, bar):
+                return
+
+            # Step 6: generate visualization if needed
+            if not generate_visualization(testcase, bar):
+                return
 
-        # Step 6: generate visualization if needed
-        if not generate_visualization():
+        # Step 7: for interactive and/or multi-pass samples, generate empty .ans if it does not exist
+        if not generate_empty_interactive_sample_ans():
             return
 
-        # Step 7: copy all generated files
+        # Step 8: copy all generated files
         copy_generated()
 
         # Note that we set this to true even if not all files were overwritten -- a different log/warning message will be displayed for that.
         t.generate_success = True
-        add_testdata_to_cache()
+        if infile.is_file():
+            add_test_case_to_cache()
         if config.args.action != "generate":
             bar.logged = True  # Disable redundant 'up to date' message in run mode.
         bar.done(message="SKIPPED: up to date")
@@ -1205,11 +1316,7 @@ def __init__(
                             color_type=MessageType.LOG,
                         )
 
-        if "testdata.yaml" in yaml:
-            self.testdata_yaml = yaml["testdata.yaml"]
-        else:
-            self.testdata_yaml = False
-
+        self.test_group_yaml: Any = yaml.get("test_group.yaml", False)
         self.numbered = False
 
         # List of child TestcaseRule/Directory objects, filled by parse().
@@ -1294,7 +1401,7 @@ def walk(self, testcase_f=None, dir_f=True, *, dir_last=False):
     def generate(d, problem, generator_config, bar):
         # Generate the current directory:
         # - Create the directory.
-        # - Write testdata.yaml.
+        # - Write test_group.yaml.
         # - Link included testcases.
         #   - Input of included testcases are re-validated with the
         #     directory-specific input validator flags.
@@ -1304,29 +1411,29 @@ def generate(d, problem, generator_config, bar):
         dir_path = problem.path / "data" / d.path
         dir_path.mkdir(parents=True, exist_ok=True)
 
-        # Write the testdata.yaml, or remove it when the key is set but empty.
-        testdata_yaml_path = dir_path / "testdata.yaml"
-        if d.testdata_yaml:
-            generator_config.known_files.add(testdata_yaml_path)
-            yaml_text = write_yaml(dict(d.testdata_yaml))
+        # Write the test_group.yaml, or remove it when the key is set but empty.
+        test_group_yaml_path = dir_path / "test_group.yaml"
+        if d.test_group_yaml:
+            generator_config.known_files.add(test_group_yaml_path)
+            yaml_text = write_yaml(dict(d.test_group_yaml))
 
-            if testdata_yaml_path.is_file():
-                if yaml_text == testdata_yaml_path.read_text():
+            if test_group_yaml_path.is_file():
+                if yaml_text == test_group_yaml_path.read_text():
                     # identical -> skip
                     pass
                 else:
                     # different -> overwrite
-                    generator_config.remove(testdata_yaml_path)
-                    testdata_yaml_path.write_text(yaml_text)
-                    bar.log("CHANGED: testdata.yaml")
+                    generator_config.remove(test_group_yaml_path)
+                    test_group_yaml_path.write_text(yaml_text)
+                    bar.log("CHANGED: test_group.yaml")
             else:
                 # new file -> create it
-                testdata_yaml_path.write_text(yaml_text)
-                bar.log("NEW: testdata.yaml")
-        elif d.testdata_yaml == "" and testdata_yaml_path.is_file():
+                test_group_yaml_path.write_text(yaml_text)
+                bar.log("NEW: test_group.yaml")
+        elif d.test_group_yaml == "" and test_group_yaml_path.is_file():
             # empty -> remove it
-            generator_config.remove(testdata_yaml_path)
-            bar.log("REMOVED: testdata.yaml")
+            generator_config.remove(test_group_yaml_path)
+            bar.log("REMOVED: test_group.yaml")
         bar.done()
 
     def generate_includes(d, problem, generator_config, bar):
@@ -1363,12 +1470,12 @@ def generate_includes(d, problem, generator_config, bar):
             meta_yaml = read_yaml(meta_path)
             testcase = Testcase(problem, infile, short_path=new_case)
 
-            # Step 1: validate input
+            # Step 1: validate .in
             if not t.validate_in(problem, testcase, meta_yaml, bar):
                 continue
 
-            # Step 2: validate answer
-            if not t.validate_ans(problem, testcase, meta_yaml, bar):
+            # Step 2: validate .ans (and .out if it exists)
+            if not t.validate_ans_and_out(problem, testcase, meta_yaml, bar):
                 continue
 
             t.link(problem, generator_config, bar, new_infile)
@@ -1376,13 +1483,13 @@ def generate_includes(d, problem, generator_config, bar):
 
 
 # Returns the numbered name
-def numbered_testcase_name(basename, i, n):
+def numbered_test_case_name(base_name, i, n):
     width = len(str(n))
     number_prefix = f"{i:0{width}}"
-    if basename:
-        return number_prefix + "-" + basename
+    if base_name:
+        return number_prefix + "-" + base_name
     else:
-        assert basename is None or basename == ""
+        assert base_name is None or base_name == ""
         return number_prefix
 
 
@@ -1427,25 +1534,25 @@ def __init__(self, problem, restriction=None):
         yaml_path = self.problem.path / "generators" / "generators.yaml"
         self.n_parse_error = 0
 
-        # A map of paths `secret/testgroup/testcase` to their canonical TestcaseRule.
+        # A map of paths `secret/test_group/test_case` to their canonical TestcaseRule.
         # For generated cases this is the rule itself.
-        # For included cases, this is the 'resolved' location of the testcase that is included.
+        # For included cases, this is the 'resolved' location of the test case that is included.
         self.known_cases = dict()
-        # A map of paths `secret/testgroup` to Directory rules.
+        # A map of paths `secret/test_group` to Directory rules.
         self.known_directories = dict()
         # Used for cleanup
         self.known_files = set()
-        # A map from key to (is_included, list of testcases and directories),
+        # A map from key to (is_included, list of test cases and directories),
         # used for `include` statements.
         self.known_keys = collections.defaultdict[str, tuple[bool, list[TestcaseRule | Directory]]](
             lambda: (False, [])
         )
         # A set of testcase rules, including seeds.
         self.rules_cache = dict()
-        # The set of generated testcases keyed by hash(testdata).
-        self.generated_testdata = dict()
+        # The set of generated test cases keyed by hash(test_case).
+        self.generated_test_cases = dict()
         # Path to the trash directory for this run
-        self.trashdir: Optional[Path] = None
+        self.trash_dir: Optional[Path] = None
         # Set of hash(.in) for all generated testcases
         self.hashed_in = set()
         # Files that should be processed
@@ -1508,8 +1615,8 @@ def add_known(obj):
                     color_type=MessageType.ERROR,
                 )
 
-        num_numbered_testcases = 0
-        testcase_id = 0
+        num_numbered_test_cases = 0
+        test_case_id = 0
 
         def parse_count(yaml, warn_for=None):
             if not has_count(yaml):
@@ -1545,7 +1652,7 @@ def parse_count(yaml, warn_for=None):
         # Count the number of testcases in the given directory yaml.
         # This parser is quite forgiving,
         def count(yaml):
-            nonlocal num_numbered_testcases
+            nonlocal num_numbered_test_cases
             ds = yaml.get("data")
             if isinstance(ds, dict):
                 ds = [ds]
@@ -1558,7 +1665,7 @@ def count(yaml):
                 if isinstance(elem, dict):
                     for key in elem:
                         if is_testcase(elem[key]) and numbered:
-                            num_numbered_testcases += parse_count(elem[key])
+                            num_numbered_test_cases += parse_count(elem[key])
                         elif is_directory(elem[key]):
                             count(elem[key])
 
@@ -1576,7 +1683,7 @@ def parse(key: str, name_gen: Callable[[], str], yaml: dict, parent: AnyDirector
 
             if is_testcase(yaml):
                 if isinstance(parent, RootDirectory):
-                    raise ParseException("Testcase must be inside Directory", name)
+                    raise ParseException("Test case must be inside a Directory.", name)
 
                 count = parse_count(yaml, parent.path / name)
 
@@ -1612,23 +1719,23 @@ def parse(key: str, name_gen: Callable[[], str], yaml: dict, parent: AnyDirector
                 raise ParseException("Duplicate entry", d.path)
             add_known(d)
 
-            # Parse child directories/testcases.
+            # Parse child test cases/groups.
             if "data" in yaml and yaml["data"]:
                 data = yaml["data"] if isinstance(yaml["data"], list) else [yaml["data"]]
-                # Count the number of child testgroups.
-                num_testgroups = 0
+                # Count the number of child test groups.
+                num_test_groups = 0
                 for dictionary in data:
                     assert_type("Elements of data", dictionary, dict, d.path)
                     for key in dictionary.keys():
                         assert_type("Key of data", key, [type(None), str], d.path / str(key))
                     for child_name, child_yaml in sorted(dictionary.items()):
                         if is_directory(child_yaml):
-                            num_testgroups += 1
+                            num_test_groups += 1
 
-                testgroup_id = 0
+                test_group_id = 0
                 for dictionary in data:
                     for key in dictionary:
-                        assert_type("Testcase/directory name", key, [type(None), str], d.path)
+                        assert_type("Test case/group name", key, [type(None), str], d.path)
 
                     # Process named children alphabetically, but not in the root directory.
                     # There, process in the 'natural order'.
@@ -1665,24 +1772,24 @@ def parse(key: str, name_gen: Callable[[], str], yaml: dict, parent: AnyDirector
                         if d.numbered:
                             if is_directory(child_yaml):
 
-                                def next_testgroup_name():
-                                    nonlocal testgroup_id
-                                    testgroup_id += 1
-                                    return numbered_testcase_name(
-                                        child_key, testgroup_id, num_testgroups
+                                def next_test_group_name():
+                                    nonlocal test_group_id
+                                    test_group_id += 1
+                                    return numbered_test_case_name(
+                                        child_key, test_group_id, num_test_groups
                                     )
 
-                                child_name = next_testgroup_name
+                                child_name = next_test_group_name
                             elif is_testcase(child_yaml):
 
-                                def next_testcase_name():
-                                    nonlocal testcase_id
-                                    testcase_id += 1
-                                    return numbered_testcase_name(
-                                        child_key, testcase_id, num_numbered_testcases
+                                def next_test_case_name():
+                                    nonlocal test_case_id
+                                    test_case_id += 1
+                                    return numbered_test_case_name(
+                                        child_key, test_case_id, num_numbered_test_cases
                                     )
 
-                                child_name = next_testcase_name
+                                child_name = next_test_case_name
                             else:
                                 # Use error will be given inside parse(child).
                                 child_name = lambda: ""  # noqa: E731  # TODO this can probably be prettier
@@ -1691,7 +1798,7 @@ def next_testcase_name():
                             child_name = lambda: child_key  # noqa: E731  # TODO this can probably be prettier
                             if not child_name():
                                 raise ParseException(
-                                    "Unnumbered testcases must not have an empty key",
+                                    "Unnumbered test cases must not have an empty key",
                                     d.path,
                                 )
                         c = parse(child_key, child_name, child_yaml, d)
@@ -1732,7 +1839,7 @@ def add_included_case(t: TestcaseRule):
                     assert_type("include", include, str, d.path)
                     if "/" in include:
                         message(
-                            f"Include {include} should be a testcase/testgroup key, not a path.",
+                            f"Include {include} should be a test case/group key, not a path.",
                             "generators.yaml",
                             d.path,
                             color_type=MessageType.ERROR,
@@ -1762,7 +1869,7 @@ def add_included_case(t: TestcaseRule):
                             pass
                     else:
                         message(
-                            f"Unknown include key {include} does not refer to a previous testcase.",
+                            f"Unknown include key {include} does not refer to a previous test case.",
                             "generators.yaml",
                             d.path,
                             color_type=MessageType.ERROR,
@@ -1775,7 +1882,6 @@ def add_included_case(t: TestcaseRule):
     def build(self, build_visualizers=True, skip_double_build_warning=False):
         generators_used: set[Path] = set()
         solutions_used: set[Path] = set()
-        visualizers_used: set[Path] = set()
 
         # Collect all programs that need building.
         # Also, convert the default submission into an actual Invocation.
@@ -1796,16 +1902,14 @@ def collect_programs(t):
                             default_solution = DefaultSolutionInvocation(self)
                         t.config.solution = default_solution
                     solutions_used.add(t.config.solution.program_path)
-            if build_visualizers and t.config.visualizer:
-                visualizers_used.add(t.config.visualizer.program_path)
 
         self.root_dir.walk(collect_programs, dir_f=None)
 
         def build_programs(
-            program_type: type[program.Generator | program.Visualizer | run.Submission],
-            program_paths: set[Path],
+            program_type: type[program.Generator | run.Submission],
+            program_paths: Iterable[Path],
         ):
-            programs = list[program.Generator | program.Visualizer | run.Submission]()
+            programs = list[program.Generator | run.Submission]()
             for program_path in program_paths:
                 path = self.problem.path / program_path
                 if program_type is program.Generator and program_path in self.generators:
@@ -1839,20 +1943,17 @@ def build_program(p):
         # TODO: Consider building all types of programs in parallel as well.
         build_programs(program.Generator, generators_used)
         build_programs(run.Submission, solutions_used)
-        build_programs(program.Visualizer, visualizers_used)
+        if build_visualizers:
+            self.problem.visualizer(visualize.InputVisualizer)
+            self.problem.visualizer(visualize.OutputVisualizer)
 
         self.problem.validators(validate.InputValidator)
-        if not self.problem.interactive and not self.problem.multi_pass:
-            self.problem.validators(validate.AnswerValidator)
+        self.problem.validators(validate.AnswerValidator)
         self.problem.validators(validate.OutputValidator)
 
         def cleanup_build_failures(t):
             if t.config.solution and t.config.solution.program is None:
                 t.config.solution = None
-            if not build_visualizers or (
-                t.config.visualizer and t.config.visualizer.program is None
-            ):
-                t.config.visualizer = None
 
         self.root_dir.walk(cleanup_build_failures, dir_f=None)
 
@@ -1907,9 +2008,9 @@ def generate_copies_and_includes(d):
 
     # move a file or into the trash directory
     def remove(self, src):
-        if self.trashdir is None:
-            self.trashdir = self.problem.tmpdir / "trash" / secrets.token_hex(4)
-        dst = self.trashdir / src.absolute().relative_to((self.problem.path / "data").absolute())
+        if self.trash_dir is None:
+            self.trash_dir = self.problem.tmpdir / "trash" / secrets.token_hex(4)
+        dst = self.trash_dir / src.absolute().relative_to((self.problem.path / "data").absolute())
         dst.parent.mkdir(parents=True, exist_ok=True)
 
         shutil.move(src, dst)
@@ -1946,8 +2047,8 @@ def clean_up(self):
         bar = ProgressBar("Clean Up", max_len=-1)
 
         self._remove_unknown(self.problem.path / "data", bar)
-        if self.trashdir is not None:
-            bar.warn("Some files were changed/removed.", f"-> {self.trashdir}")
+        if self.trash_dir is not None:
+            bar.warn("Some files were changed/removed.", f"-> {self.trash_dir}")
         bar.finalize()
 
     # write a gitignore file to ignore everything in data/ except data/sample/
@@ -2099,22 +2200,22 @@ def reorder(self):
             assert "data" in d.yaml
             assert isinstance(d.yaml["data"], list)
 
-            # dont move unknown cases/directories or testcases with count
-            testnodes = {
+            # don't move unknown test cases/groups, or test cases with count
+            test_nodes = {
                 id(c.yaml): str(c.path)
                 for c in d.data
                 if c.path in testcase_paths and not has_count(c.yaml)
             }
-            others = [e for e in d.yaml["data"] if id(next(iter(e.values()))) not in testnodes]
+            others = [e for e in d.yaml["data"] if id(next(iter(e.values()))) not in test_nodes]
 
             class TestcaseResult:
                 def __init__(self, yaml):
                     self.yaml = yaml
-                    self.testnode = testnodes[id(next(iter(yaml.values())))]
+                    self.test_node = test_nodes[id(next(iter(yaml.values())))]
                     self.scores = []
                     self.result = []
                     for i in range(len(submissions)):
-                        verdict = verdict_table.results[i][self.testnode]
+                        verdict = verdict_table.results[i][self.test_node]
                         # moving TLE cases to the front is most important to save resources
                         # RTE are less reliable and therefore less important than WA
                         if verdict == Verdict.TIME_LIMIT_EXCEEDED:
@@ -2123,10 +2224,10 @@ def __init__(self, yaml):
                             self.scores.append((i, 4))
                         elif verdict == Verdict.RUNTIME_ERROR:
                             self.scores.append((i, 3))
-                        self.result.append(verdict_table._get_verdict(i, self.testnode))
+                        self.result.append(verdict_table._get_verdict(i, self.test_node))
 
                 def __str__(self):
-                    return f"{Fore.CYAN}Reorder{Style.RESET_ALL}: {self.testnode:<{max_testcase_len}} {''.join(self.result)}"
+                    return f"{Fore.CYAN}Reorder{Style.RESET_ALL}: {self.test_node:<{max_testcase_len}} {''.join(self.result)}"
 
                 def score(self, weights):
                     return sum(weights[i] * x for i, x in self.scores)
@@ -2146,7 +2247,9 @@ def update(self, weights):
                     return weights
 
             todo = [
-                TestcaseResult(e) for e in d.yaml["data"] if id(next(iter(e.values()))) in testnodes
+                TestcaseResult(e)
+                for e in d.yaml["data"]
+                if id(next(iter(e.values()))) in test_nodes
             ]
 
             # TODO: ProgressBar?
diff --git a/bin/interactive.py b/bin/interactive.py
index 83390c365..bcdc2d39d 100644
--- a/bin/interactive.py
+++ b/bin/interactive.py
@@ -1,13 +1,17 @@
+import os
 import signal
-import time
 import subprocess
 import sys
 import threading
+import time
+
+from contextlib import nullcontext
+from pathlib import Path
 from typing import Final, Literal, Optional, TYPE_CHECKING
 
 import config
-from util import *
 import validate
+from util import *
 from verdicts import Verdict
 
 if TYPE_CHECKING:
@@ -25,14 +29,14 @@ def run_interactive_testcase(
     team_error: Literal[False] | None = False,
     *,
     # False/None: no output
-    # True: stdout
+    # True: stderr
     # else: path
     interaction: Optional[bool | Path] = False,
     submission_args: Optional[list[str]] = None,
     bar: Optional[ProgressBar] = None,
 ):
     output_validators = run.problem.validators(validate.OutputValidator)
-    if len(output_validators) != 1:
+    if not output_validators:
         return None
     output_validator = output_validators[0]
 
@@ -54,7 +58,7 @@ def get_validator_command():
                 run.testcase.ans_path.resolve(),
                 run.feedbackdir.resolve(),
             ]
-            + run.testcase.testdata_yaml_validator_args(
+            + run.testcase.test_case_yaml_args(
                 output_validator,
                 bar or PrintBar("Run interactive test case"),
             )
@@ -165,6 +169,8 @@ def get_validator_command():
                 verdict = Verdict.VALIDATOR_CRASH
                 break
 
+        run._visualize_output(bar or PrintBar("Visualize interaction"))
+
         if tle_result is None:
             # Set result.err to validator error and result.out to team error.
             return ExecResult(
@@ -190,15 +196,19 @@ def get_validator_command():
     # - Close remaining program + write end of pipe
     # - Close remaining read end of pipes
 
-    interaction_file = None
     # TODO: Print interaction when needed.
-    if interaction and interaction is not True:
+    old_handler = None
+    if isinstance(interaction, Path):
         assert not interaction.is_relative_to(run.tmpdir)
-        interaction_file = interaction.open("a")
-        interaction = True
-
-    # Connect pipes with tee.
-    TEE_CODE = R"""
+    elif interaction:
+        assert threading.current_thread() is threading.main_thread()
+    with (
+        interaction.open("a")
+        if isinstance(interaction, Path)
+        else nullcontext(None) as interaction_file
+    ):
+        # Connect pipes with tee.
+        TEE_CODE = R"""
 import sys
 c = sys.argv[1]
 new = True
@@ -213,221 +223,232 @@ def get_validator_command():
     new = l=='\n'
 """
 
-    pass_id = 0
-    max_duration = 0
-    tle_result = None
-    while True:
-        pass_id += 1
-        validator_command = get_validator_command()
-        validator = subprocess.Popen(
-            validator_command,
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            # TODO: Make a flag to pass validator error directly to terminal.
-            stderr=subprocess.PIPE if validator_error is False else None,
-            cwd=validator_dir,
-            pipesize=BUFFER_SIZE,
-            preexec_fn=limit_setter(validator_command, validation_time, validation_memory, 0),
-        )
-        validator_pid = validator.pid
-        # add all programs to the same group (for simplicity we take the pid of the validator)
-        # then we can wait for all program ins the same group
-        gid = validator_pid
-
-        assert validator.stdin and validator.stdout
-
-        if interaction:
-            team_tee = subprocess.Popen(
-                ["python3", "-c", TEE_CODE, ">"],
+        pass_id = 0
+        max_duration = 0
+        tle_result = None
+        while True:
+            pass_id += 1
+            validator_command = get_validator_command()
+            validator = subprocess.Popen(
+                validator_command,
                 stdin=subprocess.PIPE,
-                stdout=validator.stdin,
-                stderr=interaction_file,
-                pipesize=BUFFER_SIZE,
-                preexec_fn=limit_setter(None, None, None, gid),
-            )
-            team_tee_pid = team_tee.pid
-            val_tee = subprocess.Popen(
-                ["python3", "-c", TEE_CODE, "<"],
-                stdin=validator.stdout,
                 stdout=subprocess.PIPE,
-                stderr=interaction_file,
+                # TODO: Make a flag to pass validator error directly to terminal.
+                stderr=subprocess.PIPE if validator_error is False else None,
+                cwd=validator_dir,
                 pipesize=BUFFER_SIZE,
-                preexec_fn=limit_setter(None, None, None, gid),
+                preexec_fn=limit_setter(validator_command, validation_time, validation_memory, 0),
             )
-            val_tee_pid = val_tee.pid
-
-        submission = subprocess.Popen(
-            submission_command,
-            stdin=(val_tee if interaction else validator).stdout,
-            stdout=(team_tee if interaction else validator).stdin,
-            stderr=subprocess.PIPE if team_error is False else None,
-            cwd=submission_dir,
-            pipesize=BUFFER_SIZE,
-            preexec_fn=limit_setter(submission_command, timeout, memory, gid),
-        )
-        submission_pid = submission.pid
-
-        stop_kill_handler = threading.Event()
-        submission_time: Optional[float] = None
-
-        def kill_handler_function():
-            if stop_kill_handler.wait(timeout + 1):
-                return
-            nonlocal submission_time
-            submission_time = timeout + 1
-            try:
-                os.kill(submission_pid, signal.SIGKILL)
-            except ProcessLookupError:
-                pass
-            if validation_time > timeout and stop_kill_handler.wait(validation_time - timeout):
-                return
-            os.killpg(gid, signal.SIGKILL)
-
-        kill_handler = threading.Thread(target=kill_handler_function, daemon=True)
-        kill_handler.start()
-
-        # Will be filled in the loop below.
-        validator_status = None
-        submission_status = None
-        first = None
-
-        # Wait for first to finish
-        left = 4 if interaction else 2
-        first_done = True
-        while left > 0:
-            pid, status, rusage = os.wait4(-gid, 0)
-
-            # On abnormal exit (e.g. from calling abort() in an assert), we set status to -1.
-            status = os.WEXITSTATUS(status) if os.WIFEXITED(status) else -1
-
-            if pid == validator_pid:
-                if first is None:
-                    first = "validator"
-                validator_status = status
-
-                # Close the output stream.
-                validator.stdout.close()
-                if interaction:
-                    assert val_tee.stdout
-                    val_tee.stdout.close()
-
-                # Kill the team submission and everything else in case we already know it's WA.
-                if first_done and validator_status != config.RTV_AC:
-                    stop_kill_handler.set()
+            validator_pid = validator.pid
+            # add all programs to the same group (for simplicity we take the pid of the validator)
+            # then we can wait for all program ins the same group
+            gid = validator_pid
+
+            if interaction is True:
+
+                def interrupt_handler(sig: Any, frame: Any) -> None:
                     os.killpg(gid, signal.SIGKILL)
-                first_done = False
-            elif pid == submission_pid:
-                if first is None:
-                    first = "submission"
-                submission_status = status
-
-                # Close the output stream.
-                validator.stdin.close()
-                if interaction:
-                    assert team_tee.stdin
-                    team_tee.stdin.close()
-
-                # Possibly already written by the alarm.
-                if submission_time is None:
-                    submission_time = rusage.ru_utime + rusage.ru_stime
-
-                first_done = False
-            elif interaction:
-                if pid == team_tee_pid or pid == val_tee_pid:
+                    if callable(old_handler):
+                        old_handler(sig, frame)
+
+                old_handler = signal.signal(signal.SIGINT, interrupt_handler)
+
+            assert validator.stdin and validator.stdout
+
+            if interaction:
+                team_tee = subprocess.Popen(
+                    [sys.executable, "-c", TEE_CODE, ">"],
+                    stdin=subprocess.PIPE,
+                    stdout=validator.stdin,
+                    stderr=interaction_file or True,
+                    pipesize=BUFFER_SIZE,
+                    preexec_fn=limit_setter(None, None, None, gid),
+                )
+                team_tee_pid = team_tee.pid
+                val_tee = subprocess.Popen(
+                    [sys.executable, "-c", TEE_CODE, "<"],
+                    stdin=validator.stdout,
+                    stdout=subprocess.PIPE,
+                    stderr=interaction_file or True,
+                    pipesize=BUFFER_SIZE,
+                    preexec_fn=limit_setter(None, None, None, gid),
+                )
+                val_tee_pid = val_tee.pid
+
+            submission = subprocess.Popen(
+                submission_command,
+                stdin=(val_tee if interaction else validator).stdout,
+                stdout=(team_tee if interaction else validator).stdin,
+                stderr=subprocess.PIPE if team_error is False else None,
+                cwd=submission_dir,
+                pipesize=BUFFER_SIZE,
+                preexec_fn=limit_setter(submission_command, timeout, memory, gid),
+            )
+            submission_pid = submission.pid
+
+            stop_kill_handler = threading.Event()
+            submission_time: Optional[float] = None
+
+            def kill_handler_function():
+                if stop_kill_handler.wait(timeout + 1):
+                    return
+                nonlocal submission_time
+                submission_time = timeout + 1
+                try:
+                    os.kill(submission_pid, signal.SIGKILL)
+                except ProcessLookupError:
                     pass
+                if validation_time > timeout and stop_kill_handler.wait(validation_time - timeout):
+                    return
+                os.killpg(gid, signal.SIGKILL)
+
+            kill_handler = threading.Thread(target=kill_handler_function, daemon=True)
+            kill_handler.start()
+
+            # Will be filled in the loop below.
+            validator_status = None
+            submission_status = None
+            first = None
+
+            # Wait for first to finish
+            left = 4 if interaction else 2
+            first_done = True
+            while left > 0:
+                pid, status, rusage = os.wait4(-gid, 0)
+
+                # On abnormal exit (e.g. from calling abort() in an assert), we set status to -1.
+                status = os.WEXITSTATUS(status) if os.WIFEXITED(status) else -1
+
+                if pid == validator_pid:
+                    if first is None:
+                        first = "validator"
+                    validator_status = status
+
+                    # Close the output stream.
+                    validator.stdout.close()
+                    if interaction:
+                        assert val_tee.stdout
+                        val_tee.stdout.close()
+
+                    # Kill the team submission and everything else in case we already know it's WA.
+                    if first_done and validator_status != config.RTV_AC:
+                        stop_kill_handler.set()
+                        os.killpg(gid, signal.SIGKILL)
+                    first_done = False
+                elif pid == submission_pid:
+                    if first is None:
+                        first = "submission"
+                    submission_status = status
+
+                    # Close the output stream.
+                    validator.stdin.close()
+                    if interaction:
+                        assert team_tee.stdin
+                        team_tee.stdin.close()
+
+                    # Possibly already written by the alarm.
+                    if submission_time is None:
+                        submission_time = rusage.ru_utime + rusage.ru_stime
+
+                    first_done = False
+                elif interaction:
+                    if pid == team_tee_pid or pid == val_tee_pid:
+                        pass
+                    else:
+                        assert False
                 else:
                     assert False
-            else:
-                assert False
-
-            left -= 1
-
-        stop_kill_handler.set()
-
-        assert submission_time is not None
-        did_timeout = submission_time > time_limit
-        aborted = submission_time >= timeout
-        max_duration = max(max_duration, submission_time)
-
-        # If submission timed out: TLE
-        # If team exists first with TLE/RTE -> TLE/RTE
-        # If team exists first nicely -> validator result
-        # If validator exits first with WA -> WA
-        # If validator exits first with AC:
-        # - team TLE/RTE -> TLE/RTE
-        # - more team output -> WA
-        # - no more team output -> AC
-
-        if validator_status not in [config.RTV_AC, config.RTV_WA]:
-            config.n_error += 1
-            verdict = Verdict.VALIDATOR_CRASH
-        elif validator_status == config.RTV_WA and nextpass and nextpass.is_file():
-            error("got WRONG_ANSWER but found nextpass.in")
-            verdict = Verdict.VALIDATOR_CRASH
-        elif aborted:
-            verdict = Verdict.TIME_LIMIT_EXCEEDED
-        elif first == "validator":
-            # WA has priority because validator reported it first.
-            if did_timeout:
-                verdict = Verdict.TIME_LIMIT_EXCEEDED
-            elif validator_status == config.RTV_WA:
-                verdict = Verdict.WRONG_ANSWER
-            elif submission_status != 0:
-                verdict = Verdict.RUNTIME_ERROR
-            else:
-                verdict = Verdict.ACCEPTED
-        else:
-            assert first == "submission"
-            if submission_status != 0:
-                verdict = Verdict.RUNTIME_ERROR
-            elif did_timeout:
-                verdict = Verdict.TIME_LIMIT_EXCEEDED
-            elif validator_status == config.RTV_WA:
-                verdict = Verdict.WRONG_ANSWER
-            else:
-                verdict = Verdict.ACCEPTED
 
-        val_err = None
-        if validator_error is False:
-            assert validator.stderr
-            val_err = _feedback(run, validator.stderr.read())
-        team_err = None
-        if team_error is False:
-            assert submission.stderr
-            team_err = submission.stderr.read().decode("utf-8", "replace")
-
-        if verdict == Verdict.TIME_LIMIT_EXCEEDED:
-            if tle_result is None:
-                tle_result = ExecResult(
-                    None,
-                    ExecStatus.ACCEPTED,
-                    max_duration,
-                    aborted,
-                    val_err,
-                    team_err,
-                    verdict,
-                    pass_id if run.problem.multi_pass else None,
-                )
+                left -= 1
+
+            stop_kill_handler.set()
+
+            if old_handler:
+                signal.signal(signal.SIGINT, old_handler)
+
+            assert submission_time is not None
+            did_timeout = submission_time > time_limit
+            aborted = submission_time >= timeout
+            max_duration = max(max_duration, submission_time)
+
+            # If submission timed out: TLE
+            # If team exists first with TLE/RTE -> TLE/RTE
+            # If team exists first nicely -> validator result
+            # If validator exits first with WA -> WA
+            # If validator exits first with AC:
+            # - team TLE/RTE -> TLE/RTE
+            # - more team output -> WA
+            # - no more team output -> AC
+
+            if validator_status not in [config.RTV_AC, config.RTV_WA]:
+                config.n_error += 1
+                verdict = Verdict.VALIDATOR_CRASH
+            elif validator_status == config.RTV_WA and nextpass and nextpass.is_file():
+                error("got WRONG_ANSWER but found nextpass.in")
+                verdict = Verdict.VALIDATOR_CRASH
+            elif aborted:
+                verdict = Verdict.TIME_LIMIT_EXCEEDED
+            elif first == "validator":
+                # WA has priority because validator reported it first.
+                if did_timeout:
+                    verdict = Verdict.TIME_LIMIT_EXCEEDED
+                elif validator_status == config.RTV_WA:
+                    verdict = Verdict.WRONG_ANSWER
+                elif submission_status != 0:
+                    verdict = Verdict.RUNTIME_ERROR
+                else:
+                    verdict = Verdict.ACCEPTED
             else:
-                tle_result.timeout_expired |= aborted
+                assert first == "submission"
+                if submission_status != 0:
+                    verdict = Verdict.RUNTIME_ERROR
+                elif did_timeout:
+                    verdict = Verdict.TIME_LIMIT_EXCEEDED
+                elif validator_status == config.RTV_WA:
+                    verdict = Verdict.WRONG_ANSWER
+                else:
+                    verdict = Verdict.ACCEPTED
+
+            val_err = None
+            if validator_error is False:
+                assert validator.stderr
+                val_err = _feedback(run, validator.stderr.read())
+            team_err = None
+            if team_error is False:
+                assert submission.stderr
+                team_err = submission.stderr.read().decode("utf-8", "replace")
+
+            if verdict == Verdict.TIME_LIMIT_EXCEEDED:
+                if tle_result is None:
+                    tle_result = ExecResult(
+                        None,
+                        ExecStatus.ACCEPTED,
+                        max_duration,
+                        aborted,
+                        val_err,
+                        team_err,
+                        verdict,
+                        pass_id if run.problem.multi_pass else None,
+                    )
+                else:
+                    tle_result.timeout_expired |= aborted
 
-        if not verdict and not run._continue_with_tle(verdict, aborted):
-            break
+            if not verdict and not run._continue_with_tle(verdict, aborted):
+                break
 
-        if not run._prepare_nextpass(nextpass):
-            break
+            if not run._prepare_nextpass(nextpass):
+                break
 
-        assert run.problem.limits.validation_passes is not None
-        if pass_id >= run.problem.limits.validation_passes:
-            error("exceeded limit of validation_passes")
-            verdict = Verdict.VALIDATOR_CRASH
-            break
+            assert run.problem.limits.validation_passes is not None
+            if pass_id >= run.problem.limits.validation_passes:
+                error("exceeded limit of validation_passes")
+                verdict = Verdict.VALIDATOR_CRASH
+                break
 
-        if interaction:
-            print("---", file=sys.stderr if interaction is None else interaction_file, flush=True)
+            if interaction:
+                print("---", file=interaction_file or sys.stderr, flush=True)
 
-    if interaction_file is not None:
-        interaction_file.close()
+    run._visualize_output(bar or PrintBar("Visualize interaction"))
 
     if tle_result is None:
         return ExecResult(
diff --git a/bin/latex.py b/bin/latex.py
index ab3539571..6fd114429 100644
--- a/bin/latex.py
+++ b/bin/latex.py
@@ -6,13 +6,12 @@
 import sys
 from enum import Enum
 from pathlib import Path
-from typing import Optional
+from typing import Optional, TYPE_CHECKING
 
 from colorama import Fore, Style
 
 import config
 from contest import contest_yaml, problems_yaml
-import problem
 from util import (
     copy_and_substitute,
     ensure_symlink,
@@ -26,20 +25,27 @@
     warn,
 )
 
+if TYPE_CHECKING:  # Prevent circular import: https://stackoverflow.com/a/39757388
+    from problem import Problem
 
-class PdfType(str, Enum):
-    PROBLEM = "problem"
-    PROBLEM_SLIDE = "problem-slide"
-    SOLUTION = "solution"
 
+class PdfType(Enum):
+    PROBLEM = Path("statement") / "problem"
+    PROBLEM_SLIDE = Path("problem_slide") / "problem-slide"
+    SOLUTION = Path("solution") / "solution"
 
-def latex_builddir(problem: "problem.Problem", language: str) -> Path:
+    def path(self, lang: Optional[str] = None, ext: str = ".tex") -> Path:
+        lang = f".{lang}" if lang is not None else ""
+        return self.value.with_name(f"{self.value.name}{lang}{ext}")
+
+
+def latex_builddir(problem: "Problem", language: str) -> Path:
     builddir = problem.tmpdir / "latex" / language
     builddir.mkdir(parents=True, exist_ok=True)
     return builddir
 
 
-def create_samples_file(problem: "problem.Problem", language: str) -> None:
+def create_samples_file(problem: "Problem", language: str) -> None:
     builddir = latex_builddir(problem, language)
 
     # create the samples.tex file
@@ -164,12 +170,23 @@ def flush():
     samples_file_path.write_text("".join(samples_data))
 
 
+def create_constants_file(problem: "Problem", language: str) -> None:
+    constant_data: list[str] = []
+    for key, item in problem.settings.constants.items():
+        constant_data.append(f"\\expandafter\\def\\csname constants_{key}\\endcsname{{{item}}}\n")
+
+    builddir = latex_builddir(problem, language)
+    constants_file_path = builddir / "constants.tex"
+    constants_file_path.write_text("".join(constant_data))
+
+
 # Steps needed for both problem and contest compilation.
-def prepare_problem(problem: "problem.Problem", language: str):
+def prepare_problem(problem: "Problem", language: str):
     create_samples_file(problem, language)
+    create_constants_file(problem, language)
 
 
-def get_tl(problem: "problem.Problem"):
+def get_tl(problem: "Problem"):
     tl = problem.limits.time_limit
     tl = int(tl) if abs(tl - int(tl)) < 0.0001 else tl
 
@@ -183,7 +200,7 @@ def get_tl(problem: "problem.Problem"):
     return tl if print_tl else ""
 
 
-def problem_data(problem: "problem.Problem", language: str):
+def problem_data(problem: "Problem", language: str):
     background = next(
         (
             p["rgb"][1:]
@@ -193,12 +210,9 @@ def problem_data(problem: "problem.Problem", language: str):
         "ffffff",
     )
     # Source: https://github.com/DOMjudge/domjudge/blob/095854650facda41dbb40966e70199840b887e33/webapp/src/Twig/TwigExtension.php#L1056
-    foreground = (
-        "000000" if sum(int(background[i : i + 2], 16) for i in range(0, 6, 2)) > 450 else "ffffff"
-    )
-    border = "".join(
-        ("00" + hex(max(0, int(background[i : i + 2], 16) - 64))[2:])[-2:] for i in range(0, 6, 2)
-    )
+    background_rgb = [int(background[i : i + 2], 16) for i in [0, 2, 4]]
+    foreground = "000000" if sum(background_rgb) > 450 else "ffffff"
+    border = "".join(f"{max(0, color - 64):02x}" for color in background_rgb)
 
     return {
         "problemlabel": problem.label,
@@ -221,18 +235,15 @@ def make_environment() -> dict[str, str]:
         Path.cwd(),
         Path.cwd() / "solve_stats",
         Path.cwd() / "solve_stats" / "activity",
+        Path.cwd() / "latex",
         config.TOOLS_ROOT / "latex",
+        # The default empty element at the end makes sure that the new TEXINPUTS ends with a path separator.
+        # This is required to make LaTeX look in the default global paths: https://tex.stackexchange.com/a/410353
+        env.get("TEXINPUTS", ""),
     ]
-    texinputs = ""
-    for p in latex_paths:
-        texinputs += str(p) + ";"
+    texinputs = os.pathsep.join(map(str, latex_paths))
     if config.args.verbose >= 2:
         print(f"export TEXINPUTS='{texinputs}'", file=sys.stderr)
-    if "TEXINPUTS" in env:
-        prev = env["TEXINPUTS"]
-        if len(prev) > 0 and prev[-1] != ";":
-            prev += ";"
-        texinputs = prev + texinputs
     env["TEXINPUTS"] = texinputs
     return env
 
@@ -350,19 +361,16 @@ def run_latexmk(stdout, stderr):
 
 # 1. Copy the latex/problem.tex file to tmpdir/<problem>/latex/<language>/problem.tex,
 #    substituting variables.
-# 2. Create tmpdir/<problem>/latex/<language>/samples.tex.
+# 2. Create tmpdir/<problem>/latex/<language>/{samples,constants}.tex.
 # 3. Run latexmk and link the resulting <build_type>.<language>.pdf into the problem directory.
-def build_problem_pdf(
-    problem: "problem.Problem", language: str, build_type=PdfType.PROBLEM, web=False
-):
+def build_problem_pdf(problem: "Problem", language: str, build_type=PdfType.PROBLEM, web=False):
     """
     Arguments:
     -- language: str, the two-letter language code appearing the file name, such as problem.en.tex
     """
-    main_file = build_type.value
-    main_file += "-web.tex" if web else ".tex"
+    main_file = build_type.path(ext="-web.tex" if web else ".tex").name
 
-    bar = PrintBar(f"{main_file[:-3]}{language}.pdf")
+    bar = PrintBar(f"{main_file[:-4]}.{language}.pdf")
     bar.log(f"Building PDF for language {language}")
 
     prepare_problem(problem, language)
@@ -374,42 +382,43 @@ def build_problem_pdf(
         local_data if local_data.is_file() else config.TOOLS_ROOT / "latex" / main_file,
         builddir / main_file,
         problem_data(problem, language),
+        bar=bar,
     )
 
     return build_latex_pdf(builddir, builddir / main_file, language, bar, problem.path)
 
 
-def build_problem_pdfs(problem: "problem.Problem", build_type=PdfType.PROBLEM, web=False):
+def build_problem_pdfs(problem: "Problem", build_type=PdfType.PROBLEM, web=False):
     """Build PDFs for various languages. If list of languages is specified,
-    (either via config files or --language arguments), build those. Otherwise
+    (either via config files or --lang arguments), build those. Otherwise
     build all languages for which there is a statement latex source.
     """
-    if config.args.languages is not None:
-        for lang in config.args.languages:
+    if config.args.lang is not None:
+        for lang in config.args.lang:
             if lang not in problem.statement_languages:
                 message(
                     f"No statement source for language {lang}",
                     problem.name,
                     color_type=MessageType.FATAL,
                 )
-        languages = config.args.languages
+        languages = config.args.lang
     else:
         languages = problem.statement_languages
-        # For solutions or problem slides, filter for `<build_type>.<language>.tex` files that exist.
+        # For solutions or problem slides, filter for `<build_type>.<lang>.tex` files that exist.
         if build_type != PdfType.PROBLEM:
             filtered_languages = []
             for lang in languages:
-                if (problem.path / "problem_statement" / f"{build_type.value}.{lang}.tex").exists():
+                if (problem.path / build_type.path(lang)).exists():
                     filtered_languages.append(lang)
                 else:
                     message(
-                        f"{build_type.value}.{lang}.tex not found",
+                        f"{build_type.path(lang)} not found",
                         problem.name,
                         color_type=MessageType.WARN,
                     )
             languages = filtered_languages
     if config.args.watch and len(languages) > 1:
-        fatal("--watch does not work with multiple languages. Please use --language")
+        fatal("--watch does not work with multiple languages. Please use --lang")
     return all([build_problem_pdf(problem, lang, build_type, web) for lang in languages])
 
 
@@ -424,7 +433,7 @@ def find_logo() -> Path:
 
 def build_contest_pdf(
     contest: str,
-    problems: list["problem.Problem"],
+    problems: list["Problem"],
     tmpdir: Path,
     language: str,
     build_type=PdfType.PROBLEM,
@@ -447,13 +456,13 @@ def build_contest_pdf(
         "subtitle": "",
         "year": "YEAR",
         "author": "AUTHOR",
-        "testsession": "",
+        "test_session": "",
     }
     config_data = contest_yaml()
     for x in default_config_data:
         if x not in config_data:
             config_data[x] = default_config_data[x]
-    config_data["testsession"] = "\\testsession" if config_data.get("testsession") else ""
+    config_data["test_session"] = "\\testsession" if config_data.get("test_session") else ""
     config_data["logofile"] = find_logo().as_posix()
 
     local_contest_data = Path("contest_data.tex")
@@ -465,6 +474,7 @@ def build_contest_pdf(
         ),
         builddir / "contest_data.tex",
         config_data,
+        bar=bar,
     )
 
     problems_data = ""
@@ -478,35 +488,37 @@ def build_contest_pdf(
         elif headertex.exists():
             problems_data += f"\\input{{{headertex}}}\n"
 
-    local_per_problem_data = Path(f"contest-{build_type.value}.tex")
+    local_per_problem_data = Path(f"contest-{build_type.path().name}")
     per_problem_data_tex = (
         local_per_problem_data
         if local_per_problem_data.is_file()
-        else config.TOOLS_ROOT / "latex" / f"contest-{build_type.value}.tex"
+        else config.TOOLS_ROOT / "latex" / local_per_problem_data.name
     ).read_text()
 
     for prob in problems:
         if build_type == PdfType.PROBLEM:
             prepare_problem(prob, language)
         else:  # i.e. for SOLUTION and PROBLEM_SLIDE
-            tex_no_lang = prob.path / "problem_statement" / f"{build_type.value}.tex"
-            tex_with_lang = prob.path / "problem_statement" / f"{build_type.value}.{language}.tex"
+            create_constants_file(prob, language)
+            tex_no_lang = prob.path / build_type.path()
+            tex_with_lang = prob.path / build_type.path(language)
             if tex_with_lang.is_file():
                 # All is good
                 pass
             elif tex_no_lang.is_file():
                 bar.warn(
-                    f"Rename {build_type.value}.tex to {build_type.value}.{language}.tex",
+                    f"Rename {tex_no_lang.name} to {tex_with_lang.name}",
                     prob.name,
                 )
                 continue
             else:
-                bar.warn(f"{build_type.value}.{language}.tex not found", prob.name)
+                bar.warn(f"{tex_with_lang.name} not found", prob.name)
                 continue
 
         problems_data += substitute(
             per_problem_data_tex,
             problem_data(prob, language),
+            bar=bar,
         )
 
     if solutions:
@@ -518,7 +530,7 @@ def build_contest_pdf(
         elif footertex.exists():
             problems_data += f"\\input{{{footertex}}}\n"
 
-    (builddir / f"contest-{build_type.value}s.tex").write_text(problems_data)
+    (builddir / f"contest-{build_type.path(ext='s.tex').name}").write_text(problems_data)
 
     return build_latex_pdf(builddir, Path(main_file), language, bar)
 
@@ -533,8 +545,8 @@ def build_contest_pdfs(contest, problems, tmpdir, lang=None, build_type=PdfType.
         message(
             "No statement language present in every problem.", contest, color_type=MessageType.FATAL
         )
-    if config.args.languages is not None:
-        languages = config.args.languages
+    if config.args.lang is not None:
+        languages = config.args.lang
         for lang in set(languages) - statement_languages:
             message(
                 f"Unable to build all statements for language {lang}",
@@ -545,7 +557,7 @@ def build_contest_pdfs(contest, problems, tmpdir, lang=None, build_type=PdfType.
         languages = statement_languages
     if config.args.watch and len(languages) > 1:
         message(
-            "--watch does not work with multiple languages. Please use --language",
+            "--watch does not work with multiple languages. Please use --lang",
             contest,
             color_type=MessageType.FATAL,
         )
diff --git a/bin/parallel.py b/bin/parallel.py
index 722045771..af265f9a3 100644
--- a/bin/parallel.py
+++ b/bin/parallel.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 import heapq
 import os
-import signal
 import threading
 from collections.abc import Callable, Sequence
 from typing import Any, Generic, Literal, Optional, TypeVar
@@ -19,7 +18,7 @@ def __init__(self, task: T, priority: int, index: int):
         self.index = index
 
     # Note: heapq uses a min heap, so higher priorities are 'smaller'.
-    def __lt__(self, other):
+    def __lt__(self, other: "QueueItem[T]") -> bool:
         if self.priority != other.priority:
             # python priority queue is a min heap but larger priority
             # items should come first => reverse compare
@@ -45,33 +44,37 @@ def __init__(self, f: Callable[[T], Any], pin: bool):
         # mutex to lock parallel access
         self.mutex = threading.RLock()
 
-    def __enter__(self):
+    def __enter__(self) -> None:
         self.mutex.__enter__()
 
-    def __exit__(self, *args):
+    def __exit__(self, *args: Any) -> None:
         self.mutex.__exit__(*args)
 
     # Add one task. Higher priority => done first
-    def put(self, task: T, priority=0):
+    def put(self, task: T, priority: int = 0) -> None:
         raise Exception("Abstract method")
 
     # By default, do nothing on .join(). This is overridden in ParallelQueue.
-    def join(self):
+    def join(self) -> None:
         return
 
-    def done(self):
+    def done(self) -> None:
         raise Exception("Abstract method")
 
-    def abort(self):
+    def abort(self) -> None:
         self.aborted = True
 
+    def ensure_alive(self) -> None:
+        if self.aborted:
+            raise util.AbortException()
+
 
 class SequentialQueue(AbstractQueue[T]):
     def __init__(self, f: Callable[[T], Any], pin: bool):
         super().__init__(f, pin)
 
     # Add one task. Higher priority => done first
-    def put(self, task: T, priority: int = 0):
+    def put(self, task: T, priority: int = 0) -> None:
         # no task will be handled after self.abort() so skip adding
         if self.aborted:
             return
@@ -80,14 +83,18 @@ def put(self, task: T, priority: int = 0):
         heapq.heappush(self.tasks, QueueItem(task, priority, self.total_tasks))
 
     # Execute all tasks.
-    def done(self):
+    def done(self) -> None:
         if self.pin:
             cores = list(os.sched_getaffinity(0))
             os.sched_setaffinity(0, {cores[0]})
 
         # no task will be handled after self.abort()
         while self.tasks and not self.aborted:
-            self.f(heapq.heappop(self.tasks).task)
+            try:
+                self.f(heapq.heappop(self.tasks).task)
+            except Exception as e:
+                if not self.aborted:
+                    raise e
 
         if self.pin:
             os.sched_setaffinity(0, cores)
@@ -125,9 +132,7 @@ def __init__(self, f: Callable[[T], Any], pin: bool, num_threads: int):
             t.start()
             self.threads.append(t)
 
-        signal.signal(signal.SIGINT, self._interrupt_handler)
-
-    def _worker(self, cores: Literal[False] | list[int] = False):
+    def _worker(self, cores: Literal[False] | list[int] = False) -> None:
         if cores is not False:
             os.sched_setaffinity(0, cores)
         while True:
@@ -153,8 +158,9 @@ def _worker(self, cores: Literal[False] | list[int] = False):
                 current_error = None
                 self.f(task)
             except Exception as e:
-                self.abort()
-                current_error = e
+                if not self.aborted:
+                    self.abort()
+                    current_error = e
 
             with self.mutex:
                 if not self.first_error:
@@ -164,20 +170,14 @@ def _worker(self, cores: Literal[False] | list[int] = False):
                 if self.missing == 0:
                     self.all_done.notify_all()
 
-    def _interrupt_handler(self, sig, frame):
-        util.fatal("Running interrupted", force=True)
-
-    def _handle_first_error(self):
+    def _handle_first_error(self) -> None:
         if self.first_error is not None:
             first_error = self.first_error
             self.first_error = None
-            # we are the main thread now, so we can handle this
-            if isinstance(first_error, ChildProcessError):
-                self._interrupt_handler(None, None)
             raise first_error
 
     # Add one task. Higher priority => done first
-    def put(self, task: T, priority: int = 0):
+    def put(self, task: T, priority: int = 0) -> None:
         with self.mutex:
             # no task should be added after .done() was called
             assert not self.finish
@@ -189,14 +189,14 @@ def put(self, task: T, priority: int = 0):
                 heapq.heappush(self.tasks, QueueItem(task, priority, self.total_tasks))
                 self.todo.notify()
 
-    def join(self):
+    def join(self) -> None:
         # wait for all current task to be completed
         with self.all_done:
             self.all_done.wait_for(lambda: self.missing == 0)
             self._handle_first_error()
 
     # Wait for all tasks to be done and stop all threads
-    def done(self):
+    def done(self) -> None:
         self.finish = True
 
         # notify all workers with permission to leave main loop
@@ -213,7 +213,7 @@ def done(self):
 
     # Discard all remaining work in the queue and stop all workers.
     # Call done() to join the threads.
-    def abort(self):
+    def abort(self) -> None:
         super().abort()
 
         with self.mutex:
@@ -227,7 +227,7 @@ def abort(self):
                 self.all_done.notify_all()
 
 
-def new_queue(f: Callable[[T], Any], pin: bool = False):
+def new_queue(f: Callable[[T], Any], pin: bool = False) -> AbstractQueue[T]:
     """
     f(task): the function to run on each queue item.
 
@@ -242,7 +242,7 @@ def new_queue(f: Callable[[T], Any], pin: bool = False):
         return SequentialQueue(f, pin)
 
 
-def run_tasks(f: Callable[[T], Any], tasks: Sequence[T], pin: bool = False):
+def run_tasks(f: Callable[[T], Any], tasks: Sequence[T], pin: bool = False) -> None:
     queue = new_queue(f, pin)
     for task in tasks:
         queue.put(task)
diff --git a/bin/problem.py b/bin/problem.py
index bd4a453a3..60ddcbe99 100644
--- a/bin/problem.py
+++ b/bin/problem.py
@@ -1,11 +1,12 @@
+import datetime
 import re
-import shlex
+import shutil
 import sys
 import threading
 
 from collections.abc import Callable, Sequence
 from pathlib import Path
-from typing import Any, Final, Literal, Optional, TYPE_CHECKING
+from typing import Any, Final, Literal, Optional, overload, TYPE_CHECKING
 
 if TYPE_CHECKING:  # Prevent circular import: https://stackoverflow.com/a/39757388
     from program import Program
@@ -19,27 +20,11 @@
 import validate
 import validator_tests
 import verdicts
+import visualize
 from util import *
 from colorama import Fore, Style
 
 
-# Parse validation mode (only for legacy problem format version)
-def parse_legacy_validation(mode: str) -> set[str]:
-    if mode == "default":
-        return {mode}
-    else:
-        ok = True
-        parsed = set()
-        for part in mode.split():
-            if part in ["custom", "interactive", "multi-pass"] and part not in parsed:
-                parsed.add(part)
-            else:
-                ok = False
-        if "custom" not in parsed or not ok:
-            fatal(f"problem.yaml: unrecognized validation mode {mode}.")
-        return parsed
-
-
 # The parse_* functions will remove (.pop()) keys from the yaml data during parsing.
 # We will warn for any unknown keys that remain after this process.
 def check_unknown_keys(yaml_data: dict[str, Any], sub_key: Optional[str] = None):
@@ -49,18 +34,21 @@ def check_unknown_keys(yaml_data: dict[str, Any], sub_key: Optional[str] = None)
 
 
 class Person:
-    def __init__(self, name: str):
-        match = re.match("(.*)<(.*)>", name)
-        self.name: str = (match[1] if match else name).strip()
-        self.email: Optional[str] = match[2].strip() if match else None
+    def __init__(self, yaml_data: str | dict[str, Any]):
+        if isinstance(yaml_data, dict):
+            self.name: str = parse_setting(yaml_data, "name", "")
+            self.email: Optional[str] = parse_optional_setting(yaml_data, "email", str)
+            self.kattis: Optional[str] = parse_optional_setting(yaml_data, "kattis", str)
+            self.orcid: Optional[str] = parse_optional_setting(yaml_data, "orcid", str)
+        else:
+            match = re.match("(.*)<(.*)>", yaml_data)
+            self.name = (match[1] if match else yaml_data).strip()
+            self.email = match[2].strip() if match else None
+            self.kattis = self.orcid = None
 
 
 class ProblemCredits:
-    def __init__(
-        self,
-        yaml_data: dict[str, Any],
-        problem_settings: "ProblemSettings",
-    ):
+    def __init__(self, yaml_data: dict[str, Any]):
         self.authors: list[Person] = []
         self.contributors: list[Person] = []
         self.testers: list[Person] = []
@@ -68,42 +56,43 @@ def __init__(
         self.packagers: list[Person] = []
         self.acknowledgements: list[Person] = []
 
-        # If problem.yaml uses the legacy version, do not support the new `credits` key.
-        # If problem.yaml uses 2023-07-draft, prefer `credit`, but also support `author` and warn for it.
-        legacy_author = parse_optional_setting(yaml_data, "author", str)
-        if problem_settings.is_legacy():
-            if legacy_author:
-                self.authors = [Person(a) for a in legacy_author.replace("and", ",").split(",")]
-        else:
-            if legacy_author is not None:
-                warn(
-                    "problem.yaml: author is removed in 2023-07-draft, please use credits.authors. SKIPPED."
-                )
-            if "credits" not in yaml_data:
-                return
-            if isinstance(yaml_data["credits"], str):
-                self.authors = [Person(parse_setting(yaml_data, "credits", ""))]
-                return
-
-            credits = parse_setting(yaml_data, "credits", dict[str, Any]())
-            self.authors = [Person(s) for s in parse_optional_list_setting(credits, "authors", str)]
-            self.contributors = [
-                Person(s) for s in parse_optional_list_setting(credits, "contributors", str)
-            ]
-            self.translators = parse_setting(credits, "translators", {})
-            for lang in list(self.translators.keys()):
-                self.translators[lang] = [
-                    Person(s) for s in parse_optional_list_setting(self.translators, lang, str)
-                ]
-            self.testers = [Person(s) for s in parse_optional_list_setting(credits, "testers", str)]
-            self.packagers = [
-                Person(s) for s in parse_optional_list_setting(credits, "packagers", str)
-            ]
-            self.acknowledgements = [
-                Person(s) for s in parse_optional_list_setting(credits, "acknowledgements", str)
-            ]
-
-            check_unknown_keys(credits, "credits")
+        parse_deprecated_setting(yaml_data, "author", "credits.authors")
+        if "credits" not in yaml_data:
+            return
+        if isinstance(yaml_data["credits"], str):
+            self.authors = [Person(parse_setting(yaml_data, "credits", ""))]
+            return
+
+        credits = parse_setting(yaml_data, "credits", dict[str, Any]())
+        self.authors = self.parse_optional_list_persons(credits, "authors")
+        self.contributors = self.parse_optional_list_persons(credits, "contributors")
+        self.translators = parse_setting(credits, "translators", {})
+        for lang in list(self.translators.keys()):
+            self.translators[lang] = self.parse_optional_list_persons(self.translators, lang)
+        self.testers = self.parse_optional_list_persons(credits, "testers")
+        self.packagers = self.parse_optional_list_persons(credits, "packagers")
+        self.acknowledgements = self.parse_optional_list_persons(credits, "acknowledgements")
+
+        check_unknown_keys(credits, "credits")
+
+    # Based on parse_optional_list_setting: the type checker does not like type unions like `str | dict`.
+    @staticmethod
+    def parse_optional_list_persons(yaml_data: dict[str, Any], key: str) -> list[Person]:
+        if key in yaml_data:
+            value = yaml_data.pop(key)
+            if isinstance(value, str | dict):
+                return [Person(value)]
+            if isinstance(value, list):
+                if not all(isinstance(v, str | dict) for v in value):
+                    warn(
+                        f"some values for key '{key}' in problem.yaml do not have type str or dict. SKIPPED."
+                    )
+                    return []
+                if not value:
+                    warn(f"value for '{key}' in problem.yaml should not be an empty list.")
+                return list(map(Person, value))
+            warn(f"incompatible value for key '{key}' in problem.yaml. SKIPPED.")
+        return []
 
 
 class ProblemSource:
@@ -119,46 +108,37 @@ class ProblemSources(list[ProblemSource]):
     def __init__(
         self,
         yaml_data: dict[str, Any],
-        problem_settings: "ProblemSettings",
     ):
-        # If problem.yaml uses the legacy version, do not support the new type of the `source` key.
-        # If problem.yaml uses 2023-07-draft, prefer `source`, but also support `source_url` and warn for it.
-        legacy_source_url = parse_optional_setting(yaml_data, "source_url", str)
-        if problem_settings.is_legacy():
-            source_name = parse_setting(yaml_data, "source", "")
-            if legacy_source_url:
-                self.append(ProblemSource(source_name, legacy_source_url))
-        else:
-            if legacy_source_url is not None:
-                warn(
-                    "problem.yaml: source_url is removed in 2023-07-draft, please use source.url. SKIPPED."
-                )
-            if "source" not in yaml_data:
-                return
-            if isinstance(yaml_data["source"], str):
-                self.append(ProblemSource(parse_setting(yaml_data, "source", "")))
-                return
-            if isinstance(yaml_data["source"], dict):
-                source = parse_setting(yaml_data, "source", dict[str, str]())
-                self.append(
-                    ProblemSource(
-                        parse_setting(source, "name", ""),
-                        parse_optional_setting(source, "url", str),
-                    )
-                )
-                return
-            if isinstance(yaml_data["source"], list):
-                sources = parse_setting(yaml_data, "source", list[dict[str, str]]())
-                for raw_source in sources:
-                    source = parse_setting(raw_source, "source", dict[str, str]())
-                    self.append(
-                        ProblemSource(
-                            parse_setting(source, "name", ""),
-                            parse_optional_setting(source, "url", str),
-                        )
-                    )
-                return
-            warn("problem.yaml key 'source' does not have the correct type")
+        def source_from_dict(source_dict: dict[str, str]) -> ProblemSource:
+            name = parse_setting(source_dict, "name", "")
+            if not name:
+                warn("problem.yaml: 'name' is required in source")
+            return ProblemSource(
+                name,
+                parse_optional_setting(source_dict, "url", str),
+            )
+
+        parse_deprecated_setting(yaml_data, "source_url", "source.url")
+        if "source" not in yaml_data:
+            return
+        if isinstance(yaml_data["source"], str):
+            self.append(ProblemSource(parse_setting(yaml_data, "source", "")))
+            return
+        if isinstance(yaml_data["source"], dict):
+            source = parse_setting(yaml_data, "source", dict[str, str]())
+            self.append(source_from_dict(source))
+            return
+        if isinstance(yaml_data["source"], list):
+            sources = parse_setting(yaml_data, "source", list[dict[str, str]]())
+            for i, source in enumerate(sources):
+                if isinstance(source, str):
+                    self.append(ProblemSource(source))
+                elif isinstance(source, dict):
+                    self.append(source_from_dict(source))
+                else:
+                    warn(f"problem.yaml key 'source[{i}]' does not have the correct type")
+            return
+        warn("problem.yaml key 'source' does not have the correct type")
 
 
 class ProblemLimits:
@@ -174,83 +154,63 @@ def __init__(
         # (defaults from https://icpc.io/problem-package-format/spec/2023-07-draft.html#limits)
         time_multipliers = parse_setting(yaml_data, "time_multipliers", dict[str, Any]())
 
-        # If problem.yaml uses the legacy version, do not support the new keys.
-        # If problem.yaml uses 2023-07-draft, prefer the new keys, but also support and warn for the old keys.
-        legacy_ac_to_time_limit = parse_optional_setting(yaml_data, "time_multiplier", float)
-        if problem_settings.is_legacy():
-            self.ac_to_time_limit = legacy_ac_to_time_limit or 5.0
-        else:
-            if legacy_ac_to_time_limit is not None:
-                warn(
-                    "problem.yaml: limits.time_multiplier is removed in 2023-07-draft, please use limits.time_multipliers.ac_to_time_limit"
-                )
-            self.ac_to_time_limit = parse_setting(
-                time_multipliers, "ac_to_time_limit", legacy_ac_to_time_limit or 2.0
-            )
-
-        legacy_time_limit_to_tle = parse_optional_setting(yaml_data, "time_safety_margin", float)
-        if problem_settings.is_legacy():
-            self.time_limit_to_tle = legacy_time_limit_to_tle or 2.0
-        else:
-            if legacy_time_limit_to_tle is not None:
-                warn(
-                    "problem.yaml: limits.time_safety_margin is removed in 2023-07-draft, please use limits.time_multipliers.time_limit_to_tle"
-                )
-            self.time_limit_to_tle = parse_setting(
-                time_multipliers, "time_limit_to_tle", legacy_time_limit_to_tle or 1.5
-            )
+        parse_deprecated_setting(yaml_data, "time_multiplier", "ac_to_time_limit")
+        self.ac_to_time_limit = parse_setting(time_multipliers, "ac_to_time_limit", 2.0, ">= 1")
+        parse_deprecated_setting(yaml_data, "time_safety_margin", "time_limit_to_tle")
+        self.time_limit_to_tle = parse_setting(time_multipliers, "time_limit_to_tle", 1.5, ">= 1")
 
         check_unknown_keys(time_multipliers, "limits.time_multipliers")
 
-        # time_limit is required, but parse as optional to more easily handle the legacy_time_limit.
-        time_limit = parse_optional_setting(yaml_data, "time_limit", float)  # in seconds
-        self.time_resolution: float = parse_setting(yaml_data, "time_resolution", 1.0)
-        self.memory: int = parse_setting(yaml_data, "memory", 2048)  # in MiB
-        self.output: int = parse_setting(yaml_data, "output", 8)  # in MiB
-        self.code: int = parse_setting(yaml_data, "code", 128)  # in KiB
-        self.compilation_time: int = parse_setting(yaml_data, "compilation_time", 60)  # in seconds
+        self.time_limit_is_default: bool = "time_limit" not in yaml_data
+        self.time_limit: float = parse_setting(yaml_data, "time_limit", 1.0, "> 0")  # in seconds
+        self.time_resolution: float = parse_setting(yaml_data, "time_resolution", 1.0, "> 0")
+        self.memory: int = parse_setting(yaml_data, "memory", 2048, "> 0")  # in MiB
+        self.output: int = parse_setting(yaml_data, "output", 8, "> 0")  # in MiB
+        self.code: int = parse_setting(yaml_data, "code", 128, "> 0")  # in KiB
+        self.compilation_time: int = parse_setting(
+            yaml_data, "compilation_time", 60, "> 0"
+        )  # in seconds
         self.compilation_memory: int = parse_setting(
-            yaml_data, "compilation_memory", 2048
+            yaml_data, "compilation_memory", 2048, "> 0"
         )  # in MiB
-        self.validation_time: int = parse_setting(yaml_data, "validation_time", 60)  # in seconds
-        self.validation_memory: int = parse_setting(yaml_data, "validation_memory", 2048)  # in MiB
-        self.validation_output: int = parse_setting(yaml_data, "validation_output", 8)  # in MiB
-        self.validation_passes: Optional[int] = parse_optional_setting(
-            yaml_data, "validation_passes", int
-        )
+        self.validation_time: int = parse_setting(
+            yaml_data, "validation_time", 60, "> 0"
+        )  # in seconds
+        self.validation_memory: int = parse_setting(
+            yaml_data, "validation_memory", 2048, "> 0"
+        )  # in MiB
+        self.validation_output: int = parse_setting(
+            yaml_data, "validation_output", 8, "> 0"
+        )  # in MiB
+        if problem_settings.multi_pass:
+            self.validation_passes: Optional[int] = parse_setting(
+                yaml_data, "validation_passes", 2, ">= 2"
+            )
+        elif "validation_passes" in yaml_data:
+            yaml_data.pop("validation_passes")
+            warn("limit: validation_passes is only used for multi-pass problems. SKIPPED.")
 
         # BAPCtools extensions:
-        self.generator_time: int = parse_setting(yaml_data, "generator_time", 60)  # in seconds
-        self.visualizer_time: int = parse_setting(yaml_data, "visualizer_time", 60)  # in seconds
-
-        # Try to read deprecated ways of setting the time limit.
-        def _get_legacy_time_limit():
-            timelimit_path = problem.path / ".timelimit"
-            if timelimit_path.is_file():
-                if not problem_settings.is_legacy():
-                    log("A .timelimit file is DEPRECATED. Use limits.time_limit instead.")
-                return float(timelimit_path.read_text())
-
-            domjudge_path = problem.path / "domjudge-problem.ini"
-            if domjudge_path.is_file():
-                log("domjudge-problem.ini is DEPRECATED. Use limits.time_limit instead.")
-                for line in domjudge_path.read_text().splitlines():
-                    key, var = map(str.strip, line.strip().split("="))
-                    if (var[0] == '"' or var[0] == "'") and (var[-1] == '"' or var[-1] == "'"):
-                        var = var[1:-1]
-                    if key == "timelimit":
-                        return float(var)
-
-        # If limits.time_limit does not exist, attempt to use legacy_time_limit instead.
-        legacy_time_limit = _get_legacy_time_limit()
-        self.time_limit: float = time_limit or legacy_time_limit or 1.0
-        self.time_limit_is_default: bool = time_limit is None and legacy_time_limit is None
+        self.generator_time: int = parse_setting(
+            yaml_data, "generator_time", 60, "> 0"
+        )  # in seconds
+        self.visualizer_time: int = parse_setting(
+            yaml_data, "visualizer_time", 60, "> 0"
+        )  # in seconds
+
+        # warn for deprecated timelimit files
+        if (problem.path / ".timelimit").is_file():
+            warn("A .timelimit file is DEPRECATED. Use limits.time_limit instead.")
+        if (problem.path / "domjudge-problem.ini").is_file():
+            warn(
+                "domjudge-problem.ini is DEPRECATED. Use limits.time_limit if you want to set a timelimit."
+            )
 
         check_unknown_keys(yaml_data, "limits")
 
         # Override limmits by command line arguments.
         self.time_limit = config.args.time_limit or self.time_limit
-        self.timeout = int(config.args.timeout or self.time_limit_to_tle * self.time_limit + 1)
+        self.timeout: int = int(config.args.timeout or self.time_limit_to_tle * self.time_limit + 1)
         if config.args.timeout:
             self.validation_time = self.generator_time = self.visualizer_time = config.args.timeout
         if config.args.memory:
@@ -273,44 +233,35 @@ def __init__(
         self.problem_format_version: str = parse_setting(
             yaml_data, "problem_format_version", "legacy-icpc"
         )
-        if not self.is_legacy() and self.problem_format_version != "2023-07-draft":
-            fatal(f"problem_format_version {self.problem_format_version} not supported")
 
-        if self.is_legacy():
-            mode = parse_legacy_validation(parse_setting(yaml_data, "validation", "default"))
-        else:
-            if "validation" in yaml_data:
-                warn(
-                    "problem.yaml: 'validation' is removed in 2023-07-draft, please use 'type' instead. SKIPPED."
-                )
-                yaml_data.pop("validation")
-            mode = set(
-                ["pass-fail"]
-                if "type" not in yaml_data
-                else parse_setting(yaml_data, "type", "pass-fail").split()
-                if isinstance(yaml_data["type"], str)
-                else parse_optional_list_setting(yaml_data, "type", str)
-                if isinstance(yaml_data["type"], list)
-                else [fatal("problem.yaml: 'type' must be a string or a sequence")]
+        if self.problem_format_version.startswith("legacy"):
+            fatal("legacy is no longer supported, try running 'bt upgrade'")
+        elif self.problem_format_version != config.SPEC_VERSION:
+            fatal(f"unrecognized problem_format_version: {self.problem_format_version}")
+
+        parse_deprecated_setting(yaml_data, "validation", "type")
+        mode = set(
+            ["pass-fail"]
+            if "type" not in yaml_data
+            else parse_setting(yaml_data, "type", "pass-fail").split()
+            if isinstance(yaml_data["type"], str)
+            else parse_optional_list_setting(yaml_data, "type", str)
+            if isinstance(yaml_data["type"], list)
+            else [fatal("problem.yaml: 'type' must be a string or a sequence")]
+        )
+        unrecognized_type = mode - {"pass-fail", "interactive", "multi-pass"}
+        if unrecognized_type:
+            fatal(
+                f"""problem.yaml: unrecognized value{
+                    "" if len(unrecognized_type) == 1 else "s"
+                } for 'type': {" ".join(sorted(unrecognized_type))}"""
             )
-            unrecognized_type = mode - {"pass-fail", "interactive", "multi-pass"}
-            if unrecognized_type:
-                fatal(
-                    f"""problem.yaml: unrecognized value{
-                        "" if len(unrecognized_type) == 1 else "s"
-                    } for 'type': {" ".join(sorted(unrecognized_type))}"""
-                )
         self.interactive: bool = "interactive" in mode
         self.multi_pass: bool = "multi-pass" in mode
         self.custom_output: bool = (
             self.interactive
             or self.multi_pass
-            or (
-                "custom" in mode
-                if self.is_legacy()
-                # TODO #424: output_validator should be singular, but DOMjudge does not support this yet, so this should be fixed during export.
-                else (problem.path / "output_validators").exists()
-            )
+            or (problem.path / validate.OutputValidator.source_dir).is_dir()
         )
 
         self.name: dict[str, str] = parse_setting(yaml_data, "name", {"en": ""})
@@ -318,37 +269,50 @@ def __init__(
             self.name[lang] = parse_setting(self.name, lang, "")
         self.uuid: str = parse_setting(yaml_data, "uuid", "")
         self.version: str = parse_setting(yaml_data, "version", "")
-        self.credits: ProblemCredits = ProblemCredits(yaml_data, self)
-        self.source: ProblemSources = ProblemSources(yaml_data, self)
+        self.credits: ProblemCredits = ProblemCredits(yaml_data)
+        self.source: ProblemSources = ProblemSources(yaml_data)
         self.license: str = parse_setting(yaml_data, "license", "unknown")
-        self.rights_owner: str = parse_setting(yaml_data, "rights_owner", "")
+        self.rights_owner: Optional[str] = parse_optional_setting(yaml_data, "rights_owner", str)
         # Not implemented in BAPCtools. Should be a date, but we don't do anything with this anyway.
-        self.embargo_until: str = parse_setting(yaml_data, "embargo-until", "")
+        self.embargo_until: Optional[datetime.date] = parse_optional_setting(
+            yaml_data,
+            "embargo_until",
+            # Note that datetime.datetime is also valid, as subclass of datetime.date
+            datetime.date,
+        )
         self.limits = ProblemLimits(parse_setting(yaml_data, "limits", {}), problem, self)
 
-        # If problem.yaml uses 2023-07-draft, disallow `validator_flags`.
-        if self.is_legacy():
-            if "validator_flags" in yaml_data and isinstance(yaml_data["validator_flags"], str):
-                yaml_data["validator_flags"] = shlex.split(yaml_data["validator_flags"])
-            # This field should not be used anywhere except the default result of Problem.get_testdata_yaml().
-            self._validator_flags: list[str] = parse_setting(yaml_data, "validator_flags", [])
-        else:
-            self._validator_flags = []
-            if "validator_flags" in yaml_data:
-                warn(
-                    "problem.yaml: 'validator_flags' is removed in 2023-07-draft, please use 'output_validator_args' in 'testdata.yaml' instead. SKIPPED."
-                )
-                yaml_data.pop("validator_flags")
+        parse_deprecated_setting(
+            yaml_data,
+            "validator_flags",
+            f"{validate.OutputValidator.args_key}' in 'test_group.yaml",
+        )
 
-        self.keywords: str = parse_setting(yaml_data, "keywords", "")
-        # Not implemented in BAPCtools. We always test all languges in langauges.yaml.
+        self.keywords: list[str] = parse_optional_list_setting(yaml_data, "keywords", str)
+        # Not implemented in BAPCtools. We always test all languages in languages.yaml.
         self.languages: list[str] = parse_optional_list_setting(yaml_data, "languages", str)
-        # Not yet implemented, pending https://github.com/Kattis/problem-package-format/issues/344
-        self.constants: dict[str, Any] = parse_setting(yaml_data, "constants", {})
+        # Not implemented in BAPCtools
+        self.allow_file_writing: bool = parse_setting(yaml_data, "allow_file_writing", False)
+
+        constants: dict[str, Any] = parse_setting(yaml_data, "constants", {})
+        self.constants: dict[str, str] = {}
+        for key, value in constants.items():
+            if not isinstance(key, str) or not config.COMPILED_CONSTANT_NAME_REGEX.fullmatch(key):
+                warn(f"invalid constant name: {key}. SKIPPED.")
+            elif not isinstance(value, (str, int, float)):
+                warn(f"invalid constant type for: {key}. SKIPPED.")
+            else:
+                self.constants[key] = str(value)
 
         # BAPCtools extensions:
         self.verified: Optional[str] = parse_optional_setting(yaml_data, "verified", str)
         self.comment: Optional[str] = parse_optional_setting(yaml_data, "comment", str)
+        self.ans_is_output: bool = parse_setting(
+            yaml_data, "ans_is_output", not self.interactive and not self.multi_pass
+        )
+        if (self.interactive or self.multi_pass) and self.ans_is_output:
+            warn(f"ans_is_output: True makes no sense for {self.type_name()} problem. IGNORED.")
+            self.ans_is_output = False
 
         check_unknown_keys(yaml_data)
 
@@ -359,20 +323,20 @@ def __init__(
             warn(f"invalid license: {self.license}")
             self.license = "unknown"
 
-        # Check that limits.validation_passes exists if and only if the problem is multi-pass
-        has_validation_passes = self.limits.validation_passes is not None
-        if self.multi_pass and not has_validation_passes:
-            self.limits.validation_passes = 2
-        if not self.multi_pass and has_validation_passes:
-            warn("limit: validation_passes is only used for multi_pass problems. SKIPPED.")
-
-    def is_legacy(self):
-        return self.problem_format_version.startswith("legacy")
+    def type_name(self) -> str:
+        parts: list[str] = []
+        if self.interactive:
+            parts.append("interactive")
+        if self.multi_pass:
+            parts.append("multi_pass")
+        if not parts:
+            parts.append("pass-fail")
+        return " ".join(parts)
 
 
 # A problem.
 class Problem:
-    _SHORTNAME_REGEX_STRING: Final[str] = "^[a-z0-9]+$"
+    _SHORTNAME_REGEX_STRING: Final[str] = "[a-z0-9]{2,255}"
     _SHORTNAME_REGEX: Final[re.Pattern[str]] = re.compile(_SHORTNAME_REGEX_STRING)
 
     def __init__(self, path: Path, tmpdir: Path, label: Optional[str] = None):
@@ -394,19 +358,22 @@ def __init__(self, path: Path, tmpdir: Path, label: Optional[str] = None):
             tuple[type[validate.AnyValidator], bool], list[validate.AnyValidator]
         ]()
         self._validators_warn_cache = set[tuple[type[validate.AnyValidator], bool]]()
+        self._visualizer_cache = dict[
+            type[visualize.AnyVisualizer], Optional[visualize.AnyVisualizer]
+        ]()
         self._programs = dict[Path, "Program"]()
         self._program_callbacks = dict[Path, list[Callable[["Program"], None]]]()
         # Dictionary from path to parsed file contents.
-        # TODO #102: Add type for testdata.yaml (typed Namespace?)
-        self._testdata_yamls = dict[Path, dict[str, Any]]()
-        self._testdata_lock = threading.Lock()
+        # TODO #102: Add type for test_group.yaml (typed Namespace?)
+        self._test_case_yamls = dict[Path, dict[str, Any]]()
+        self._test_group_lock = threading.Lock()
 
         # The label for the problem: A, B, A1, A2, X, ...
         self.label = label
 
         # TODO: transform this into nice warnings
         assert path.is_dir()
-        if not Problem._SHORTNAME_REGEX.match(self.name):
+        if not Problem._SHORTNAME_REGEX.fullmatch(self.name):
             warn(
                 f"Problem has a bad shortname: {self.name} does not match {self._SHORTNAME_REGEX_STRING}"
             )
@@ -425,30 +392,31 @@ def _determine_statement_languages(self):
         """
         yamllangs = set(self.settings.name)
         texlangs = set(
-            path.suffixes[0][1:] for path in glob(self.path, "problem_statement/problem.*.tex")
+            path.suffixes[0][1:] for path in glob(self.path, str(latex.PdfType.PROBLEM.path("*")))
         )
         for lang in texlangs - yamllangs:
             error(
-                f"{self.name}: Found problem.{lang}.tex, but no corresponding name in problem.yaml."
+                f"{self.name}: Found {latex.PdfType.PROBLEM.path(lang).name}, but no corresponding name in problem.yaml."
             )
         for lang in yamllangs - texlangs:
             error(
-                f"{self.name}: Found name for language {lang} in problem.yaml, but not problem_statement/problem.{lang}.tex."
+                f"{self.name}: Found name for language {lang} in problem.yaml, but not {latex.PdfType.PROBLEM.path(lang)}."
             )
         # Check that names in problem.yaml and \problemname{} in problem.*.tex agree:
         for lang in texlangs & yamllangs:
             unnormalised_yamlname = self.settings.name[lang]
             yamlname = " ".join(unnormalised_yamlname.split())
-            with open(self.path / "problem_statement" / f"problem.{lang}.tex") as texfile:
+            texpath = self.path / latex.PdfType.PROBLEM.path(lang)
+            with texpath.open() as texfile:
                 match texname := latex.get_argument_for_command(texfile, "problemname"):
                     case None:
-                        error(rf"No \problemname found in problem.{lang}.tex")
+                        error(rf"No \problemname found in {texpath.name}")
                         continue
                     case "":
                         continue
                     case r"\problemyamlname":
                         warn(
-                            rf"Prefer using \problemname{{}} instead of \problemname{{\problemyamlname}} in problem.{lang}.tex"
+                            rf"Prefer using \problemname{{}} instead of \problemname{{\problemyamlname}} in {texpath.name}"
                         )
                         continue
                     case s if "\\" in s or "_" in s or "^" in s:
@@ -457,7 +425,7 @@ def _determine_statement_languages(self):
                         continue
                     case s if s != yamlname:
                         warn(
-                            f"Problem titles in problem.{lang}.tex ({texname})"
+                            f"Problem titles in {texpath.name} ({texname})"
                             + f" and problem.yaml ({yamlname}) differ;"
                             + r" consider using \problemname{}."
                         )
@@ -491,147 +459,167 @@ def _read_settings(self):
         self.multi_pass: bool = self.settings.multi_pass
         self.custom_output: bool = self.settings.custom_output
 
-    # TODO #102 move to TestData class
-    def _parse_testdata_yaml(p, path, bar):
+    # TODO #102 move to a new TestGroup class
+    def _parse_test_case_and_groups_yaml(p, path: Path, bar: BAR_TYPE):
         assert path.is_relative_to(p.path / "data")
-        for dir in [path] + list(path.parents):
+        for f in [path] + list(path.parents):
             # Do not go above the data directory.
-            if dir == p.path:
+            if f == p.path:
                 return
 
-            f = dir / "testdata.yaml"
-            if not f.is_file() or f in p._testdata_yamls:
-                continue
-            with p._testdata_lock:
-                if f not in p._testdata_yamls:
-                    p._testdata_yamls[f] = flags = read_yaml(f, plain=True)
-
-                    if p.settings.is_legacy():
-                        # For legacy problems, support both _flags and _args, but move to _args.
-                        if (
-                            "output_validator_flags" in flags
-                            and "output_validator_args" not in flags
+            if f.is_dir():
+                f = f / "test_group.yaml"
+            with p._test_group_lock:
+                if not f.is_file() or f in p._test_case_yamls:
+                    continue
+                raw = substitute(
+                    f.read_text(),
+                    p.settings.constants,
+                    pattern=config.CONSTANT_SUBSTITUTE_REGEX,
+                )
+                p._test_case_yamls[f] = flags = parse_yaml(raw, path=f, plain=True)
+
+                parse_deprecated_setting(
+                    flags, "output_validator_flags", validate.OutputValidator.args_key
+                )
+                parse_deprecated_setting(
+                    flags, "input_validator_flags", validate.InputValidator.args_key
+                )
+
+                # Use variable kwargs so the type checker does not complain when passing them to a PrintBar (nothing happens in that case anyway)
+                bar_kwargs = {"resume": True, "print_item": False}
+
+                # Verify test_group.yaml
+                for k in flags:
+                    match k:
+                        case (
+                            validate.OutputValidator.args_key
+                            | validate.AnswerValidator.args_key
+                            | visualize.InputVisualizer.args_key
+                            | visualize.OutputVisualizer.args_key
                         ):
-                            flags["output_validator_args"] = flags.pop("output_validator_flags")
-                        if "input_validator_flags" in flags and "input_validator_args" not in flags:
-                            flags["input_validator_args"] = flags.pop("input_validator_flags")
-                    else:
-                        # For 2023-07-draft problems, skip the old name and warn to use the new one.
-                        if "input_validator_flags" in flags:
-                            bar.warn(
-                                "input_validator_flags is removed in 2023-07-draft, use ..._args instead. SKIPPED."
-                            )
-                        if "output_validator_flags" in flags:
+                            if not isinstance(flags[k], list):
+                                bar.error(
+                                    f"{k} must be a list of strings",
+                                    None,
+                                    **bar_kwargs,
+                                )
+                        case validate.InputValidator.args_key:
+                            if not isinstance(flags[k], (list, dict)):
+                                bar.error(
+                                    f"{k} must be list or map",
+                                    None,
+                                    **bar_kwargs,
+                                )
+                            if isinstance(flags[k], dict):
+                                input_validator_names = set(
+                                    val.name for val in p.validators(validate.InputValidator)
+                                )
+                                for name in set(flags[k]) - input_validator_names:
+                                    bar.warn(
+                                        f"Unknown input validator {name}; expected {input_validator_names}",
+                                        None,
+                                        **bar_kwargs,
+                                    )
+                        case "description" | "hint":
+                            pass  # We don't do anything with hint or description in BAPCtools, but no need to warn about this
+                        case "args" | "full_feedback" | "scoring" | "static_validation":
                             bar.warn(
-                                "output_validator_flags is removed in 2023-07-draft, use ..._args instead. SKIPPED."
+                                f"{k} in test_group.yaml not implemented in BAPCtools",
+                                None,
+                                **bar_kwargs,
                             )
+                        case _:
+                            path = f.relative_to(p.path / "data")
+                            bar.warn(f'Unknown key "{k}" in {path}', None, **bar_kwargs)
 
-                    # Verify testdata.yaml
-                    for k in flags:
-                        match k:
-                            case "output_validator_args":
-                                if not isinstance(flags[k], str):
-                                    bar.error(f"{k} must be string", resume=True, print_item=False)
-                            case "input_validator_args":
-                                if not isinstance(flags[k], (str, dict)):
-                                    bar.error(
-                                        f"{k} must be string or map",
-                                        resume=True,
-                                        print_item=False,
-                                    )
-                                if isinstance(flags[k], dict):
-                                    input_validator_names = set(
-                                        val.name for val in p.validators(validate.InputValidator)
-                                    )
-                                    for name in set(flags[k]) - input_validator_names:
-                                        bar.warn(
-                                            f"Unknown input validator {name}; expected {input_validator_names}",
-                                            print_item=False,
-                                        )
-                            case (
-                                "args"
-                                | "description"
-                                | "full_feedback"
-                                | "hint"
-                                | "scoring"
-                                | "static_validation"
-                            ):
-                                bar.warn(
-                                    f"{k} in testdata.yaml not implemented in BAPCtools",
-                                    print_item=False,
-                                )
-                            case _:
-                                path = f.relative_to(p.path / "data")
-                                bar.warn(f'Unknown key "{k}" in {path}', print_item=False)
-            # Do not go above the data directory.
-            if dir == p.path / "data":
-                break
-
-    def get_testdata_yaml(
+    def get_test_case_yaml(
         p,
         path: Path,
-        key: Literal["input_validator_args"] | Literal["output_validator_args"],
-        bar: ProgressBar | PrintBar,
+        key: str,
+        bar: BAR_TYPE,
         name: Optional[str] = None,
     ) -> list[str]:
         """
-        Find the testdata flags applying at the given path for the given key.
-        If necessary, walk up from `path` looking for the first testdata.yaml file that applies,
+        Find the value of the given test_group.yaml key applying at the given path.
+        If necessary, walk up from `path` looking for the first test_group.yaml file that applies.
 
         Side effects: parses and caches the file.
 
         Arguments
         ---------
         path: absolute path (a file or a directory)
-        key: The testdata.yaml key to look for, either of 'input_validator_args', 'output_validator_args', or 'grading'.
-            TODO: 'grading' is not yet implemented.
+        key: The test_group.yaml key to look for (TODO: 'grading' is not yet implemented)
         name: If key == 'input_validator_args', optionally the name of the input validator.
 
         Returns:
         --------
-        A list of string arguments, which is empty if no testdata.yaml is found.
+        A list of string arguments, which is empty if no test_group.yaml is found.
         TODO: when 'grading' is supported, it also can return dict
         """
-        if key not in ["input_validator_args", "output_validator_args"]:
+        known_args_keys = [
+            validate.InputValidator.args_key,
+            validate.OutputValidator.args_key,
+            validate.AnswerValidator.args_key,
+            visualize.InputVisualizer.args_key,
+            visualize.OutputVisualizer.args_key,
+        ]
+        if key not in known_args_keys:
             raise NotImplementedError(key)
-        if key != "input_validator_args" and name is not None:
+        if key != validate.InputValidator.args_key and name is not None:
             raise ValueError(
                 f"Only input validators support flags by validator name, got {key} and {name}"
             )
 
-        # parse and cache testdata.yaml
-        p._parse_testdata_yaml(path, bar)
-
-        # For legacy problems, default to validator_flags from problem.yaml
-        default_result = []
-        if p.settings.is_legacy() and p.settings._validator_flags:
-            default_result = p.settings._validator_flags
+        # parse and cache <test_case>.yaml and test_group.yaml
+        path = path.with_suffix(".yaml")
+        p._parse_test_case_and_groups_yaml(path, bar)
 
         # extract the flags
-        for dir in [path] + list(path.parents):
+        for f in [path] + list(path.parents):
             # Do not go above the data directory.
-            if dir == p.path:
-                return default_result
+            if f == p.path:
+                return []
 
-            f = dir / "testdata.yaml"
-            if f not in p._testdata_yamls:
+            if f.suffix != ".yaml":
+                f = f / "test_group.yaml"
+            if f not in p._test_case_yamls:
                 continue
-            flags = p._testdata_yamls[f]
+            flags = p._test_case_yamls[f]
             if key in flags:
-                if key == "output_validator_args":
-                    if not isinstance(flags[key], str):
-                        bar.error("ouput_validator_args must be string")
-                    return flags[key].split()
-
-                if key == "input_validator_args":
-                    if not isinstance(flags[key], (str, dict)):
-                        bar.error("input_validator_args must be string or map")
-                    if isinstance(flags[key], str):
-                        return flags[key].split()
-                    elif name in flags[key]:
-                        return flags[key][name].split()
-
-        return default_result
+                args = flags[key]
+                if key == validate.InputValidator.args_key:
+                    if not isinstance(args, (list, dict)):
+                        bar.error(f"{key} must be list of strings or map of lists")
+                        return []
+                    if isinstance(args, list):
+                        if any(not isinstance(arg, str) for arg in args):
+                            bar.error(f"{key} must be list of strings or map of lists")
+                            return []
+                        return args
+                    elif name in args:
+                        if not isinstance(args[name], list) or any(
+                            not isinstance(arg, str) for arg in args[name]
+                        ):
+                            bar.error(f"{key} must be list of strings or map of lists")
+                            return []
+                        return args[name]
+                elif key in known_args_keys:
+                    if not isinstance(args, list) or any(not isinstance(arg, str) for arg in args):
+                        bar.error(f"{key} must be a list of strings")
+                        return []
+                    return args
+
+        return []
+
+    # Because Problem.testcases() may be called multiple times (e.g. validating multiple modes, or with `bt all`),
+    # this cache makes sure that some warnings (like malformed test case names) only appear once.
+    _warned_for_test_case = set[str]()
+
+    def _warn_once(p, test_name, msg):
+        if test_name not in p._warned_for_test_case:
+            p._warned_for_test_case.add(test_name)
+            warn(msg)
 
     def testcases(
         p,
@@ -664,12 +652,13 @@ def testcases(
 
             in_paths = list(set(in_paths))
         elif mode is not None:
+            assert needans
             in_paths = []
             for prefix in {
                 validate.Mode.INPUT: ["secret", "sample"],
                 validate.Mode.ANSWER: ["secret", "sample"],
                 validate.Mode.INVALID: config.INVALID_CASE_DIRECTORIES,
-                validate.Mode.VALID_OUTPUT: ["valid_output"],
+                validate.Mode.VALID_OUTPUT: ["secret", "sample", "valid_output"],
             }[mode]:
                 in_paths += glob(p.path, f"data/{prefix}/**/*.in")
         else:
@@ -679,26 +668,35 @@ def testcases(
 
         testcases = []
         for f in in_paths:
-            t = testcase.Testcase(p, f, print_warn=True)
+            t = testcase.Testcase(p, f)
+            if not config.COMPILED_FILE_NAME_REGEX.fullmatch(f.name):
+                p._warn_once(t.name, f"Test case name {t.name} is not valid. Skipping.")
+                continue
+            if f.with_suffix("").name == "test_group":
+                p._warn_once(
+                    t.name,
+                    "Test case must not be named 'test_group', this clashes with the group-level 'test_group.yaml'. Skipping.",
+                )
+                continue
             if (
                 (p.interactive or p.multi_pass)
                 and mode in [validate.Mode.INVALID, validate.Mode.VALID_OUTPUT]
-                and t.root in ["invalid_answer", "invalid_output", "valid_output"]
+                and t.root in ["invalid_output", "valid_output"]
             ):
-                msg = ""
-                if p.interactive:
-                    msg += " interactive"
-                if p.multi_pass:
-                    msg += " multi-pass"
-                warn(f"Found file {f} for {mode} validation in{msg} problem. Skipping.")
+                warn(
+                    f"Found file {f} for {mode} validation in {p.settings.type_name()} problem. Skipping."
+                )
                 continue
             if needans and not t.ans_path.is_file():
                 if t.root != "invalid_input":
-                    warn(f"Found input file {f} without a .ans file. Skipping.")
+                    p._warn_once(t.name, f"Found input file {f} without a .ans file. Skipping.")
+                    continue
+            if mode == validate.Mode.VALID_OUTPUT:
+                if t.out_path is None:
+                    continue
+                if not t.out_path.is_file():
+                    warn(f"Found input file {f} without a .out file. Skipping.")
                     continue
-            if t.out_path is not None and not t.out_path.is_file():
-                warn(f"Found input file {f} without a .out file. Skipping.")
-                continue
             testcases.append(t)
         testcases.sort(key=lambda t: t.name)
 
@@ -717,69 +715,127 @@ def testcases(
         p._testcases[key] = testcases
         return testcases
 
-    # Returns a list of:
-    # - (Path, Path): (.in, .ans) pair
-    # - (Path, Path): (.in.statement, .ans.statement) pair
-    # -  Path       :  .interaction file
-    def statement_samples(p) -> list[Path | tuple[Path, Path]]:
-        statement_in_paths = list(glob(p.path, "data/sample/**/*.in.statement"))
-        interaction_paths = list(glob(p.path, "data/sample/**/*.interaction"))
+    def _samples(
+        p, in_extensions: list[str], ans_extensions: list[str], return_interaction_file: bool
+    ) -> list[Path | tuple[Path, Path]]:
+        """
+        Find the samples of the problem
 
-        # Make sure that .in.statement files are not mixed with .interaction files.
-        for in_path in interaction_paths:
-            if in_path.with_suffix(".in.statement").is_file():
-                warn(
-                    f"Do not mix .in.statement files and .interaction files with the same basename in {p}."
-                )
+        Arguments
+        ---------
+        in_extensions: possible extensions for an in file sorted by priority
+        ans_extensions: possible extensions for an ans file sorted by priority
+        return_interaction_file: If True allows to represent testcases by an .interaction file
 
-        # A .in may be shadowed by either .in.statement or .interaction, in which case the .in itself is not shown in the PDF.
-        in_paths = []
-        for in_path in list(glob(p.path, "data/sample/**/*.in")):
-            if in_path.with_suffix(".in.statement").is_file():
-                continue
-            if in_path.with_suffix(".interaction").is_file():
+        Returns:
+        --------
+        A list of testcases represented either by their .interaction file or an in and ans file
+        """
+
+        base_names: set[Path] = set()
+        for ext in [".in", ".in.statement", ".interaction"]:
+            files = list(p.path.glob(f"data/sample/**/*{ext}"))
+            base_names.update([drop_suffix(f, [ext]) for f in files if f.is_file()])
+        testcases: list[Path | tuple[Path, Path]] = []
+        has_raw = False
+        for name in base_names:
+            in_found = [ext for ext in in_extensions if name.with_suffix(ext).is_file()]
+            ans_found = [ext for ext in ans_extensions if name.with_suffix(ext).is_file()]
+            has_statement = ".in.statement" in in_found or ".ans.statement" in ans_found
+
+            # check for inconsistencies
+            if ".in" in in_found and ".ans" not in ans_found:
+                warn(f"Found {name}.in but no {name}.ans. SKIPPING.")
                 continue
-            in_paths.append(in_path)
 
-        # .interaction files cannot be mixed with .in/.ans pairs.
-        if len(interaction_paths) != 0 and len(in_paths) + len(statement_in_paths) != 0:
-            warn(f"Do not mix .interaction files with .in/.ans files in {p}.")
+            # resolve some inconsistencies
+            if ".in" not in in_found:
+                if ".ans" in ans_found:
+                    warn(f"Found {name}.ans but no {name}.in. IGNORED.")
+                    ans_found.remove(".ans")
+                if ".out" in ans_found:
+                    warn(f"Found {name}.out but no {name}.in. IGNORED.")
+                    ans_found.remove(".out")
+            if has_statement and ".out" in ans_found:
+                # we prefer .statement files
+                warn(f"Found {name}.out (but also .statement). IGNORED.")
+                ans_found.remove(".out")
+
+            # .interaction files get highest priority
+            if return_interaction_file and name.with_suffix(".interaction").is_file():
+                if not p.interactive and not p.multi_pass:
+                    warn(f"Found {name}.interaction for non-interactive/non-multi-pass. IGNORED.")
+                else:
+                    if has_statement:
+                        warn(
+                            f"Mixed .interaction and .statement file for {name}. (using .interaction)."
+                        )
+                    if ".out" in ans_found:
+                        warn(f"Mixed .interaction and .out file for {name}. (using .interaction).")
+                    testcases.append(name.with_suffix(".interaction"))
+                    continue
 
-        # Non-interactive and Non-multi-pass problems should not have .interaction files.
-        # On the other hand, interactive problems are allowed to have .{in,ans}.statement files,
-        # so that they can emulate a non-interactive problem with on-the-fly generated input.
-        if not p.interactive and not p.multi_pass:
-            if len(interaction_paths) != 0:
+            if not in_found or not ans_found:
                 warn(
-                    f"Non-interactive/Non-multi-pass problem {p.name} should not have data/sample/*.interaction files."
+                    f"Could not find valid .in/.ans combination for test case {name}. SKIPPED."
+                    + "\n\tNumbering for statement and download could be inconsistent!"
                 )
-            interaction_paths = []
-
-        testcases = list[Path | tuple[Path, Path]]()
-        for in_path in in_paths:
-            ans_path = in_path.with_suffix(".ans")
-            if not ans_path.is_file():
-                warn(f"Found input file {in_path} without a .ans file. Skipping.")
                 continue
-            testcases.append((in_path, ans_path))
 
-        for in_path in statement_in_paths:
-            # first remove .statement, then replace .in with .ans.statement
-            ans_path = in_path.with_suffix("").with_suffix(".ans.statement")
-            if not ans_path.is_file():
-                warn(f"Found input file {in_path} without a .ans.statement file. Skipping.")
-                continue
-            testcases.append((in_path, ans_path))
+            if (
+                not name.with_suffix(".interaction").is_file()
+                and ans_found[0] == ".ans"
+                and name.with_suffix(in_found[0]).stat().st_size > 0
+                and name.with_suffix(ans_found[0]).stat().st_size > 0
+            ):
+                has_raw = True
 
-        for interaction_path in interaction_paths:
-            testcases.append(interaction_path)
+            # fallback is pair of files
+            testcases.append((name.with_suffix(in_found[0]), name.with_suffix(ans_found[0])))
 
-        testcases.sort()
+        if has_raw and not p.settings.ans_is_output:
+            warn(
+                "It is advised to overwrite .ans for samples if it does not represent a valid output."
+                + "\n\tUse .ans.statement or .out for this."
+            )
 
+        testcases.sort()
         return testcases
 
+    # Returns a list of:
+    # - (Path, Path): with the first being one of [.in.statement, .in] and the second one of [.ans.statement, .out, .ans]
+    # -  Path       :  .interaction file
+    def statement_samples(p) -> list[Path | tuple[Path, Path]]:
+        in_extensions = [
+            ".in.statement",
+            ".in",
+        ]
+        ans_extensions = [
+            ".ans.statement",
+            ".out",
+            ".ans",
+        ]
+        return p._samples(in_extensions, ans_extensions, True)
+
+    # Returns a list of:
+    # - (Path, Path): with the first being one of [.in.download, .in.statement, .in] and the second one of [.ans.download, .ans.statement, .out, .ans]
+    def download_samples(p) -> list[tuple[Path, Path]]:
+        in_extensions = [
+            ".in.download",
+            ".in.statement",
+            ".in",
+        ]
+        ans_extensions = [
+            ".ans.download",
+            ".ans.statement",
+            ".out",
+            ".ans",
+        ]
+        testcases = p._samples(in_extensions, ans_extensions, False)
+        return [t for t in testcases if isinstance(t, tuple)]
+
     # Returns the list of submissions passed as command-line arguments, or the list of accepted submissions by default.
-    def selected_or_accepted_submissions(problem) -> list["run.Submission"]:
+    def selected_or_accepted_submissions(problem) -> list[run.Submission]:
         submissions = problem.submissions()
         if not submissions:
             return []
@@ -788,7 +844,7 @@ def selected_or_accepted_submissions(problem) -> list["run.Submission"]:
         else:
             return [s for s in submissions if s.expected_verdicts == [verdicts.Verdict.ACCEPTED]]
 
-    def submissions(problem) -> list["run.Submission"] | Literal[False]:
+    def submissions(problem) -> list[run.Submission] | Literal[False]:
         if problem._submissions is not None:
             if problem._submissions is False:
                 return False
@@ -867,6 +923,30 @@ def build_program(p):
         assert isinstance(problem._submissions, list)
         return problem._submissions.copy()
 
+    @overload
+    def visualizer(
+        problem, cls: type[visualize.InputVisualizer]
+    ) -> Optional[visualize.InputVisualizer]: ...
+    @overload
+    def visualizer(
+        problem, cls: type[visualize.OutputVisualizer]
+    ) -> Optional[visualize.OutputVisualizer]: ...
+    def visualizer(
+        problem, cls: type[visualize.AnyVisualizer]
+    ) -> Optional[visualize.AnyVisualizer]:
+        path = problem.path / cls.source_dir
+        if not path.is_dir():
+            return None
+        if cls not in problem._visualizer_cache:
+            visualizer = cls(problem, path)
+            bar = ProgressBar(f"Building {cls.visualizer_type} visualizer", items=[visualizer])
+            localbar = bar.start(visualizer)
+            visualizer.build(localbar)
+            localbar.done()
+            bar.finalize(print_done=False)
+            problem._visualizer_cache[cls] = visualizer if visualizer.ok else None
+        return problem._visualizer_cache[cls]
+
     def validators(
         problem,
         cls: type[validate.AnyValidator],
@@ -888,24 +968,23 @@ def validators(
             list(Validator) otherwise, maybe empty
         """
         validators = problem._validators(cls, check_constraints)
-        if not strict and cls == validate.AnswerValidator:
+        if not strict and cls == validate.AnswerValidator and problem.settings.ans_is_output:
             validators = validators + problem._validators(
                 validate.OutputValidator, check_constraints
             )
 
         # Check that the proper number of validators is present
-        # do this after handling the strict flag but dont warn every time
+        # do this after handling the strict flag but do not warn every time
         if print_warn:
             key = (cls, check_constraints)
             if key not in problem._validators_warn_cache:
+                constraints_msg = " for constraints cecking" if check_constraints else ""
                 problem._validators_warn_cache.add(key)
-                match cls, len(validators):
-                    case validate.InputValidator, 0:
-                        warn("No input validators found.")
-                    case validate.AnswerValidator, 0:
-                        warn("No answer validators found")
-                    case validate.OutputValidator, l if l != 1:
-                        error(f"Found {len(validators)} output validators, expected exactly one.")
+                if cls == validate.InputValidator and not validators:
+                    warn(f"No input validators{constraints_msg} found.")
+                if cls == validate.AnswerValidator and not validators and not problem.interactive:
+                    # for interactive problems, the .ans file should be empty
+                    warn(f"No answer validators{constraints_msg} found.")
 
         build_ok = all(v.ok for v in validators)
 
@@ -920,19 +999,13 @@ def _validators(
         if key in problem._validators_cache:
             return problem._validators_cache[key]
 
-        assert hasattr(cls, "source_dirs")
-        # TODO #424: We should not support multiple output validators inside output_validator/.
-        paths = [p for source_dir in cls.source_dirs for p in glob(problem.path / source_dir, "*")]
-
-        # Handle default output validation
         if cls == validate.OutputValidator:
-            if problem.settings.is_legacy() and not problem.custom_output and paths:
-                error("Validation is default but custom output validator exists (ignoring it)")
-                paths = []
-            if not paths:
-                if problem.custom_output:
-                    fatal("Problem validation type requires output_validators/")
+            if problem.custom_output:
+                paths = [problem.path / validate.OutputValidator.source_dir]
+            else:
                 paths = [config.TOOLS_ROOT / "support" / "default_output_validator.cpp"]
+        else:
+            paths = list(glob(problem.path / cls.source_dir, "*"))
 
         # TODO: Instead of checking file contents, maybe specify this in generators.yaml?
         def has_constraints_checking(f):
@@ -978,21 +1051,20 @@ def build_program(p):
         problem._validators_cache[key] = validators
         return validators
 
-    # get all testcses and submissions and prepare the output validator
+    # get all testcases and submissions and prepare the output validator and visualizer
     def prepare_run(problem):
         testcases = problem.testcases()
         if not testcases:
             return False
 
-        if problem.interactive or problem.multi_pass:
-            validators = problem.validators(validate.OutputValidator)
-            if not validators:
-                return False
-
         # Pre build the output validator to prevent nested ProgressBars.
-        if problem.validators(validate.OutputValidator) is False:
+        if not problem.validators(validate.OutputValidator):
             return False
 
+        # Pre build the output visualizer to prevent nested ProgressBars.
+        if not config.args.no_visualizer:
+            problem.visualizer(visualize.OutputVisualizer)
+
         submissions = problem.submissions()
         if not submissions:
             return False
@@ -1026,6 +1098,14 @@ def run_submissions(problem):
         testcases, submissions = ts_pair
         ok, verdict_table = Problem.run_some(testcases, submissions)
 
+        if (
+            len(testcases) * len(submissions) > 1
+            and not config.args.verbose
+            and not config.args.no_visualizer
+            and problem.visualizer(visualize.OutputVisualizer)
+        ):
+            log("use -v with --visualize to see the paths to the generated images")
+
         if config.args.table:
             Problem._print_table(verdict_table.results, testcases)
         elif config.args.overview and not config.args.tree:
@@ -1137,15 +1217,29 @@ def reset_testcase_hashes(self):
 
     # Returns None for new testcases or the Testcase object it equals.
     def matches_existing_testcase(self, t):
-        if t.root in ["invalid_input", "invalid_answer"]:
-            return None
-        h = hash_file_content(t.in_path)
+        hashes = {}
+        relevant_files = {
+            "invalid_input": ["in"],
+            "invalid_answer": [".in", ".ans"],
+            "invalid_output": [".in", ".ans", ".out"],
+            "valid_output": [".in", ".ans", ".out"],
+        }
+        relevant_files_default = [".in"] if self.settings.ans_is_output else [".in", ".ans"]
+        extensions = relevant_files.get(t.root, relevant_files_default)
+
+        for ext in extensions:
+            if t.with_suffix(ext).is_file():
+                hashes[ext] = hash_file_content(t.with_suffix(ext))
+
+        h = combine_hashes_dict(hashes)
         if h in self._testcase_hashes:
             return self._testcase_hashes[h]
         self._testcase_hashes[h] = t
         return None
 
-    def validate_data(problem, mode: validate.Mode, constraints: dict | bool | None = None) -> bool:
+    def validate_data(
+        problem, mode: validate.Mode, constraints: dict | Literal[True] | None = None
+    ) -> bool:
         """Validate aspects of the test data files.
 
         Arguments:
@@ -1156,16 +1250,6 @@ def validate_data(problem, mode: validate.Mode, constraints: dict | bool | None
             True if all validation was successful. Successful validation includes, e.g.,
             correctly rejecting invalid inputs.
         """
-        if (problem.interactive or problem.multi_pass) and mode == validate.Mode.ANSWER:
-            if problem.validators(validate.AnswerValidator, strict=True, print_warn=False):
-                msg = ""
-                if problem.interactive:
-                    msg += " interactive"
-                if problem.multi_pass:
-                    msg += " multi-pass"
-                log(f"Not running answer_validators for{msg} problems.")
-            return True
-
         action: str = ""
         if mode == validate.Mode.INVALID:
             action = "Invalidation"
@@ -1189,7 +1273,13 @@ def validate_invalid_extra_data(p) -> bool:
         validators: list[tuple[type[validate.AnyValidator], str, str, str, list[str]]] = [
             (validate.InputValidator, "invalid_input", ".in", ".in", []),
             (validate.AnswerValidator, "invalid_answer", ".ans", ".ans", [".in"]),
-            (validate.OutputValidator, "invalid_output", ".ans", ".out", [".in", ".ans"]),
+            (
+                validate.OutputValidator,
+                "invalid_output",
+                ".ans" if p.settings.ans_is_output else ".out",
+                ".out",
+                [".in", ".ans"],
+            ),
         ]
 
         testcases: list[testcase.Testcase] = []
@@ -1198,7 +1288,9 @@ def validate_invalid_extra_data(p) -> bool:
             for cls, directory, read, write, copy in validators:
                 if directory not in config.args.generic:
                     continue
-                if (p.interactive or p.multi_pass) and cls != validate.InputValidator:
+                if p.interactive and cls != validate.InputValidator:
+                    continue
+                if p.multi_pass and cls == validate.OutputValidator:
                     continue
                 if not p.validators(cls, strict=True, print_warn=False):
                     continue
@@ -1258,7 +1350,7 @@ def validate_valid_extra_data(p) -> bool:
         if not p.validators(validate.OutputValidator, strict=True, print_warn=False):
             return True
 
-        args = p.get_testdata_yaml(
+        args = p.get_test_case_yaml(
             p.path / "data" / "valid_output",
             "output_validator_args",
             PrintBar("Generic Output Validation"),
@@ -1314,7 +1406,7 @@ def validate_valid_extra_data(p) -> bool:
     def _validate_data(
         problem,
         mode: validate.Mode,
-        constraints: dict | bool | None,
+        constraints: validate.ConstraintsDict | Literal[True] | None,
         action: str,
         testcases: Sequence[testcase.Testcase],
         extra: bool = False,
@@ -1323,28 +1415,21 @@ def _validate_data(
         if not testcases:
             return True
 
-        if constraints is True:
-            constraints = {}
-        assert constraints is None or isinstance(constraints, dict)
+        constraints_dict = {} if constraints is True else constraints
+        check_constraints = constraints_dict is not None
 
         # Pre-build the relevant Validators so as to avoid clash with ProgressBar bar below
         # Also, pick the relevant testcases
-        check_constraints = constraints is not None
         match mode:
             case validate.Mode.INPUT:
                 problem.validators(validate.InputValidator, check_constraints=check_constraints)
             case validate.Mode.ANSWER:
-                assert not problem.interactive
-                assert not problem.multi_pass
                 problem.validators(validate.AnswerValidator, check_constraints=check_constraints)
             case validate.Mode.INVALID:
                 problem.validators(validate.InputValidator)
-                if not problem.interactive and not problem.multi_pass:
-                    problem.validators(validate.AnswerValidator)
+                problem.validators(validate.AnswerValidator)
                 problem.validators(validate.OutputValidator)
             case validate.Mode.VALID_OUTPUT:
-                assert not problem.interactive
-                assert not problem.multi_pass
                 problem.validators(validate.InputValidator)
                 problem.validators(validate.AnswerValidator)
                 problem.validators(validate.OutputValidator)
@@ -1363,14 +1448,7 @@ def process_testcase(testcase: testcase.Testcase):
 
             localbar = bar.start(testcase.name)
 
-            if (
-                mode == validate.Mode.INPUT
-                and not testcase.in_path.is_symlink()
-                and not testcase.root == "invalid_answer"
-                and not testcase.root == "invalid_output"
-                and not testcase.root == "valid_output"
-                and not extra
-            ):
+            if mode == validate.Mode.INPUT and not testcase.in_path.is_symlink() and not extra:
                 t2 = problem.matches_existing_testcase(testcase)
                 if t2 is not None:
                     localbar.warn(
@@ -1380,7 +1458,7 @@ def process_testcase(testcase: testcase.Testcase):
                     return
 
             ok = testcase.validate_format(
-                mode, bar=localbar, constraints=constraints, warn_instead_of_error=extra
+                mode, bar=localbar, constraints=constraints_dict, warn_instead_of_error=extra
             )
             success &= ok
             localbar.done(ok)
@@ -1390,8 +1468,8 @@ def process_testcase(testcase: testcase.Testcase):
         bar.finalize(print_done=True)
 
         # Make sure all constraints are satisfied.
-        if constraints:
-            for loc, value in sorted(constraints.items()):
+        if constraints_dict:
+            for loc, value in sorted(constraints_dict.items()):
                 loc = Path(loc).name
                 name, has_low, has_high, vmin, vmax, low, high = value
                 if not has_low:
@@ -1428,10 +1506,12 @@ def run_all(select_verdict, select):
                 return None, None, None
 
             cur_ok, verdict_table = Problem.run_some(testcases, cur_submissions)
-            ok &= cur_ok
+            if not cur_ok:
+                ok = False
+                return None, None, None
 
             def get_slowest(result):
-                slowest_pair = result.slowest_testcase()
+                slowest_pair = result.slowest_test_case()
                 assert slowest_pair is not None
                 return slowest_pair
 
diff --git a/bin/program.py b/bin/program.py
index 2dc633ed7..40278acd7 100644
--- a/bin/program.py
+++ b/bin/program.py
@@ -3,10 +3,11 @@
 import stat
 import subprocess
 import threading
-from typing import Final, TYPE_CHECKING
-
 from colorama import Fore
+from pathlib import Path
+from typing import Final, Optional, TYPE_CHECKING
 
+import config
 from util import *
 
 if TYPE_CHECKING:  # Prevent circular import: https://stackoverflow.com/a/39757388
@@ -91,7 +92,7 @@ def sanitizer():
 # Member variables are:
 # - short_path:     the path relative to problem/subdir/, or None
 # - tmpdir:         the build directory in tmpfs. This is only created when build() is called.
-# - input_files:    list of source files linked into tmpdir
+# - input_files:    list of source files linked/copied into tmpdir
 # - language:       the detected language
 # - env:            the environment variables used for compile/run command substitution
 # - hash:           a hash of all of the program including all source files
@@ -116,8 +117,9 @@ def __init__(
         subdir: str,
         deps: Optional[list[Path]] = None,
         *,
-        skip_double_build_warning=False,
+        skip_double_build_warning: bool = False,
         limits: dict[str, int] = {},
+        substitute_constants: bool = False,
     ):
         if deps is not None:
             assert isinstance(self, Generator)
@@ -139,24 +141,23 @@ def __init__(
 
         # Set self.name and self.tmpdir.
         # Ideally they are the same as the path inside the problem, but fallback to just the name.
-        try:
-            # Only resolve the parent of the program. This preserves programs that are symlinks to other directories.
-            relpath = (path.parent.resolve() / path.name).relative_to(
-                problem.path.resolve() / self.subdir
-            )
-            self.short_path = relpath
-            self.name: str = str(relpath)
-            self.tmpdir = problem.tmpdir / self.subdir / relpath
-        except ValueError:
-            self.short_path = Path(path.name)
-            self.name = str(path.name)
-            self.tmpdir = problem.tmpdir / self.subdir / path.name
+        relpath = Path(path.name)
+        if path.absolute().parent != problem.path.absolute():
+            try:
+                relpath = path.absolute().relative_to(problem.path.absolute() / subdir)
+            except ValueError:
+                pass
+
+        self.short_path = relpath
+        self.name: str = str(relpath)
+        self.tmpdir = problem.tmpdir / self.subdir / self.name
 
         self.compile_command: Optional[list[str]] = None
         self.run_command: Optional[list[str]] = None
         self.hash: Optional[str] = None
         self.env: dict[str, int | str | Path] = {}
         self.limits: dict[str, int] = limits
+        self.substitute_constants: bool = substitute_constants
 
         self.ok = True
         self.built = False
@@ -305,12 +306,11 @@ def _checks(self, bar: ProgressBar):
             for f in self.source_files:
                 try:
                     if f.read_text().find("bits/stdc++.h") != -1:
-                        if "validators/" in str(f):
-                            bar.error("Must not depend on bits/stdc++.h.", resume=True)
-                            break
-                        else:
+                        if f.is_relative_to(self.problem.path / "submissions"):
                             bar.log("Should not depend on bits/stdc++.h")
-                            break
+                        else:
+                            bar.error("Must not depend on bits/stdc++.h.", resume=True)
+                        break
                 except UnicodeDecodeError:
                     pass
 
@@ -438,13 +438,27 @@ def build(self, bar: ProgressBar):
         self.input_files = []
         hashes = []
         for f in self.source_files:
-            ensure_symlink(self.tmpdir / f.name, f)
-            self.input_files.append(self.tmpdir / f.name)
             if not f.is_file():
                 self.ok = False
                 bar.error(f"{str(f)} is not a file")
                 return False
-            hashes.append(hash_file(f))
+            tmpf = self.tmpdir / f.name
+            if (
+                not self.substitute_constants
+                or not self.problem.settings.constants
+                or not has_substitute(f, config.CONSTANT_SUBSTITUTE_REGEX)
+            ):
+                ensure_symlink(tmpf, f)
+            else:
+                copy_and_substitute(
+                    f,
+                    tmpf,
+                    self.problem.settings.constants,
+                    pattern=config.CONSTANT_SUBSTITUTE_REGEX,
+                    bar=bar,
+                )
+            self.input_files.append(tmpf)
+            hashes.append(hash_file(tmpf))
         self.hash = combine_hashes(hashes)
 
         if not self._get_language(bar):
@@ -499,7 +513,7 @@ def build(self, bar: ProgressBar):
 
         return True
 
-    def _exec_command(self, *args, **kwargs):
+    def _exec_command(self, *args, **kwargs) -> ExecResult:
         if "timeout" not in kwargs and "timeout" in self.limits:
             kwargs["timeout"] = self.limits["timeout"]
         if "memory" not in kwargs and "memory" in self.limits:
@@ -520,6 +534,7 @@ def __init__(self, problem: "Problem", path: Path, **kwargs):
             path,
             "generators",
             limits={"timeout": problem.limits.generator_time},
+            substitute_constants=True,
             **kwargs,
         )
 
@@ -550,16 +565,12 @@ def run(self, bar, cwd, name, args=[]):
                 cwd=cwd,
             )
 
-        result.retry = False
-
         if result.status == ExecStatus.TIMEOUT:
             # Timeout -> stop retrying and fail.
             bar.log(f"TIMEOUT after {timeout}s", color=Fore.RED)
             return result
 
         if not result.status:
-            # Other error -> try again.
-            result.retry = True
             return result
 
         if stdout_path.read_text():
@@ -574,23 +585,3 @@ def run(self, bar, cwd, name, args=[]):
                 return result
 
         return result
-
-
-class Visualizer(Program):
-    def __init__(self, problem: "Problem", path: Path, **kwargs):
-        super().__init__(
-            problem,
-            path,
-            "visualizers",
-            limits={"timeout": problem.limits.visualizer_time},
-            **kwargs,
-        )
-
-    # Run the visualizer.
-    # Stdin and stdout are not used.
-    def run(self, cwd, args=[]):
-        assert self.run_command is not None
-        return self._exec_command(
-            self.run_command + args,
-            cwd=cwd,
-        )
diff --git a/bin/run.py b/bin/run.py
index 87d29b88d..c03472d32 100644
--- a/bin/run.py
+++ b/bin/run.py
@@ -4,8 +4,9 @@
 import sys
 
 from colorama import Fore, Style
+from contextlib import nullcontext
 from pathlib import Path
-from typing import cast
+from typing import Optional
 
 import config
 import interactive
@@ -13,8 +14,10 @@
 import problem
 import program
 import validate
+import visualize
 from testcase import Testcase
 from util import (
+    BAR_TYPE,
     crop_output,
     ensure_symlink,
     error,
@@ -23,6 +26,7 @@
     is_bsd,
     is_windows,
     ProgressBar,
+    shorten_path,
     warn,
 )
 from verdicts import from_string, from_string_domjudge, RunUntil, Verdict, Verdicts
@@ -40,7 +44,7 @@ def __init__(self, problem: "problem.Problem", submission: "Submission", testcas
             self.problem.tmpdir
             / "runs"
             / self.submission.short_path
-            / cast(Path, self.testcase.short_path).with_suffix("")
+            / self.testcase.short_path.with_suffix("")
         )
 
         self.in_path: Path = self.tmpdir / "testcase.in"
@@ -78,93 +82,91 @@ def run(self, bar, *, interaction=None, submission_args=None):
         else:
             if interaction:
                 assert not interaction.is_relative_to(self.tmpdir)
-                interaction = interaction.open("a")
-            nextpass = self.feedbackdir / "nextpass.in" if self.problem.multi_pass else None
-            pass_id = 0
-            max_duration = 0
-            tle_result = None
-            while True:
-                pass_id += 1
-                result = self.submission.run(self.in_path, self.out_path)
-                max_duration = max(max_duration, result.duration)
-
-                # write an interaction file for samples
-                if interaction:
-                    data = self.in_path.read_text()
-                    if len(data) > 0 and data[-1] == "\n":
-                        data = data[:-1]
-                    data = data.replace("\n", "\n<")
-                    print("<", data, sep="", file=interaction)
-
-                    data = self.out_path.read_text()
-                    if len(data) > 0 and data[-1] == "\n":
-                        data = data[:-1]
-                    data = data.replace("\n", "\n>")
-                    print(">", data, sep="", file=interaction)
-
-                if result.duration > self.problem.limits.time_limit:
-                    result.verdict = Verdict.TIME_LIMIT_EXCEEDED
-                    if tle_result is None:
-                        tle_result = result
-                        tle_result.pass_id = pass_id if self.problem.multi_pass else None
-                    else:
-                        tle_result.timeout_expired |= result.timeout_expired
-                    if not self._continue_with_tle(result.verdict, result.timeout_expired):
+            with interaction.open("a") if interaction else nullcontext(None) as interaction_file:
+                nextpass = self.feedbackdir / "nextpass.in" if self.problem.multi_pass else None
+                pass_id = 0
+                max_duration = 0
+                tle_result = None
+                while True:
+                    pass_id += 1
+                    result = self.submission.run(self.in_path, self.out_path)
+                    max_duration = max(max_duration, result.duration)
+
+                    # write an interaction file for samples
+                    if interaction:
+                        data = self.in_path.read_text()
+                        if len(data) > 0 and data[-1] == "\n":
+                            data = data[:-1]
+                        data = data.replace("\n", "\n<")
+                        print("<", data, sep="", file=interaction_file)
+
+                        data = self.out_path.read_text()
+                        if len(data) > 0 and data[-1] == "\n":
+                            data = data[:-1]
+                        data = data.replace("\n", "\n>")
+                        print(">", data, sep="", file=interaction_file)
+
+                    if result.duration > self.problem.limits.time_limit:
+                        result.verdict = Verdict.TIME_LIMIT_EXCEEDED
+                        if tle_result is None:
+                            tle_result = result
+                            tle_result.pass_id = pass_id if self.problem.multi_pass else None
+                        else:
+                            tle_result.timeout_expired |= result.timeout_expired
+                        if not self._continue_with_tle(result.verdict, result.timeout_expired):
+                            break
+                    elif result.status == ExecStatus.ERROR:
+                        result.verdict = Verdict.RUNTIME_ERROR
+                        msg = f"Exited with code {result.returncode}"
+                        if config.args.error and result.err:
+                            result.err = f"{msg}:\n{result.err}"
+                        else:
+                            result.err = msg
                         break
-                elif result.status == ExecStatus.ERROR:
-                    result.verdict = Verdict.RUNTIME_ERROR
-                    if config.args.error:
-                        result.err = (
-                            "Exited with code " + str(result.returncode) + ":\n" + result.err
+
+                    result = self._validate_output(bar)
+                    if result is None:
+                        bar.error(
+                            f"No output validator found for testcase {self.testcase.name}",
+                            resume=True,
+                        )
+                        result = ExecResult(
+                            None,
+                            ExecStatus.REJECTED,
+                            0,
+                            False,
+                            None,
+                            None,
+                            Verdict.VALIDATOR_CRASH,
                         )
+                    elif result.status:
+                        result.verdict = Verdict.ACCEPTED
+                        validate.sanity_check(
+                            self.problem, self.out_path, bar, strict_whitespace=False
+                        )
+                    elif result.status == ExecStatus.REJECTED:
+                        result.verdict = Verdict.WRONG_ANSWER
+                        if nextpass and nextpass.is_file():
+                            bar.error("got WRONG_ANSWER but found nextpass.in", resume=True)
+                            result.verdict = Verdict.VALIDATOR_CRASH
                     else:
-                        result.err = "Exited with code " + str(result.returncode)
-                    break
-
-                result = self._validate_output(bar)
-                if result is None:
-                    bar.error(
-                        f"No output validator found for testcase {self.testcase.name}",
-                        resume=True,
-                    )
-                    result = ExecResult(
-                        None,
-                        ExecStatus.REJECTED,
-                        0,
-                        False,
-                        None,
-                        None,
-                        Verdict.VALIDATOR_CRASH,
-                    )
-                elif result.status:
-                    result.verdict = Verdict.ACCEPTED
-                    validate.sanity_check(self.problem, self.out_path, bar, strict_whitespace=False)
-                elif result.status == ExecStatus.REJECTED:
-                    result.verdict = Verdict.WRONG_ANSWER
-                    if nextpass and nextpass.is_file():
-                        bar.error("got WRONG_ANSWER but found nextpass.in", resume=True)
+                        config.n_error += 1
                         result.verdict = Verdict.VALIDATOR_CRASH
-                else:
-                    config.n_error += 1
-                    result.verdict = Verdict.VALIDATOR_CRASH
-
-                if result.verdict != Verdict.ACCEPTED:
-                    break
 
-                if not self._prepare_nextpass(nextpass):
-                    break
+                    if result.verdict != Verdict.ACCEPTED:
+                        break
 
-                assert self.problem.limits.validation_passes is not None
-                if pass_id >= self.problem.limits.validation_passes:
-                    bar.error("exceeded limit of validation_passes", resume=True)
-                    result.verdict = Verdict.VALIDATOR_CRASH
-                    break
+                    if not self._prepare_nextpass(nextpass):
+                        break
 
-                if interaction:
-                    print("---", file=interaction)
+                    assert self.problem.limits.validation_passes is not None
+                    if pass_id >= self.problem.limits.validation_passes:
+                        bar.error("exceeded limit of validation_passes", resume=True)
+                        result.verdict = Verdict.VALIDATOR_CRASH
+                        break
 
-            if interaction:
-                interaction.close()
+                    if interaction:
+                        print("---", file=interaction_file)
 
             if self.problem.multi_pass:
                 result.pass_id = pass_id
@@ -174,7 +176,9 @@ def run(self, bar, *, interaction=None, submission_args=None):
 
             result.duration = max_duration
 
-            # Delete .out files larger than 1MB.
+            self._visualize_output(bar)
+
+            # Delete .out files larger than 1GB.
             if (
                 not config.args.error
                 and self.out_path.is_file()
@@ -215,14 +219,30 @@ def _prepare_nextpass(self, nextpass):
         shutil.move(nextpass, self.in_path)
         return True
 
-    def _validate_output(self, bar):
+    def _validate_output(self, bar: BAR_TYPE) -> Optional[ExecResult]:
         output_validators = self.problem.validators(validate.OutputValidator)
-        if len(output_validators) != 1:
+        if not output_validators:
             return None
-        validator = output_validators[0]
+        output_validator = output_validators[0]
+        assert isinstance(output_validator, validate.OutputValidator)
+        return output_validator.run(
+            self.testcase,
+            self,
+            args=self.testcase.test_case_yaml_args(output_validator, bar),
+        )
 
-        return validator.run(
-            self.testcase, self, args=self.testcase.testdata_yaml_validator_args(validator, bar)
+    def _visualize_output(self, bar: BAR_TYPE) -> Optional[ExecResult]:
+        if config.args.no_visualizer:
+            return None
+        output_visualizer = self.problem.visualizer(visualize.OutputVisualizer)
+        if output_visualizer is None:
+            return None
+        return output_visualizer.run(
+            self.in_path,
+            self.testcase.ans_path.resolve(),
+            self.out_path if not self.problem.interactive else None,
+            self.feedbackdir,
+            args=self.testcase.test_case_yaml_args(output_visualizer, bar),
         )
 
 
@@ -329,14 +349,15 @@ def run(self, in_path, out_path, crop=True, args=[], cwd=None, generator_timeout
         # Just for safety reasons, change the cwd.
         if cwd is None:
             cwd = self.tmpdir
-        with in_path.open("rb") as inf:
-            out_file = out_path.open("wb") if out_path else None
-
+        with (
+            in_path.open("rb") as in_file,
+            out_path.open("wb") if out_path else nullcontext(None) as out_file,
+        ):
             # Print stderr to terminal is stdout is None, otherwise return its value.
             result = self._exec_command(
                 self.run_command + args,
                 crop=crop,
-                stdin=inf,
+                stdin=in_file,
                 stdout=out_file,
                 stderr=None if out_file is None else True,
                 cwd=cwd,
@@ -346,9 +367,7 @@ def run(self, in_path, out_path, crop=True, args=[], cwd=None, generator_timeout
                     else self.limits["timeout"]
                 ),
             )
-            if out_file:
-                out_file.close()
-            return result
+        return result
 
     # Run this submission on all testcases that are given.
     # Returns (OK verdict, printed newline)
@@ -418,14 +437,16 @@ def process_run(run: Run):
                 if result.out:
                     data = crop_output(result.out)
 
-            judgemessage = run.feedbackdir / "judgemessage.txt"
-            judgeerror = run.feedbackdir / "judgeerror.txt"
             # Add data from feedbackdir.
             for f in run.feedbackdir.iterdir():
-                if f in [judgemessage, judgeerror]:
-                    continue
                 if f.name.startswith("."):
                     continue  # skip "hidden" files
+                if f.name in ["judgemessage.txt", "judgeerror.txt"]:
+                    continue
+                if f.name.startswith("judgeimage.") or f.name.startswith("teamimage."):
+                    data += f"{f.name}: {shorten_path(self.problem, f.parent) / f.name}\n"
+                    ensure_symlink(run.problem.path / f.name, f, output=True, relative=False)
+                    continue
                 if not f.is_file():
                     localbar.warn(f"Validator wrote to {f} but it's not a file.")
                     continue
@@ -479,7 +500,7 @@ def process_run(run: Run):
         else:
             color = Fore.GREEN if self.verdict in self.expected_verdicts else Fore.RED
 
-        (salient_testcase, salient_duration) = verdicts.salient_testcase()
+        (salient_testcase, salient_duration) = verdicts.salient_test_case()
         salient_print_verdict = self.verdict
         salient_duration_style = Style.BRIGHT if salient_duration >= self.limits["timeout"] else ""
 
@@ -487,7 +508,7 @@ def process_run(run: Run):
         message = f"{color}{salient_print_verdict.short():>3}{salient_duration_style}{salient_duration:6.3f}s{Style.RESET_ALL} {Style.DIM}@ {salient_testcase:{max_testcase_len}}{Style.RESET_ALL}"
 
         if verdicts.run_until in [RunUntil.DURATION, RunUntil.ALL]:
-            slowest_pair = verdicts.slowest_testcase()
+            slowest_pair = verdicts.slowest_test_case()
             assert slowest_pair is not None
             (slowest_testcase, slowest_duration) = slowest_pair
             slowest_verdict = verdicts[slowest_testcase]
@@ -522,10 +543,8 @@ def test(self):
 
         testcases = self.problem.testcases(needans=False)
 
-        if self.problem.interactive:
-            output_validators = self.problem.validators(validate.OutputValidator)
-            if output_validators is False:
-                return
+        if not self.problem.validators(validate.OutputValidator):
+            return
 
         for testcase in testcases:
             header = ProgressBar.action("Running " + str(self.name), testcase.name)
@@ -589,10 +608,8 @@ def test(self):
 
     # Run the submission using stdin as input.
     def test_interactive(self):
-        if self.problem.interactive:
-            output_validators = self.problem.validators(validate.OutputValidator)
-            if output_validators is False:
-                return
+        if not self.problem.validators(validate.OutputValidator):
+            return
 
         bar = ProgressBar("Running " + str(self.name), max_len=1, count=1)
         bar.start()
diff --git a/bin/skel.py b/bin/skel.py
index 316302b68..ced7cac1f 100644
--- a/bin/skel.py
+++ b/bin/skel.py
@@ -1,114 +1,46 @@
-import shutil
+import os
 import datetime
 import re
+import shutil
+from pathlib import Path
 
 # Local imports
 import config
-from export import force_single_language
+import contest
+import latex
 from problem import Problem
 from util import *
-import contest
-
-try:
-    import questionary
-    from questionary import Validator, ValidationError
-
-    has_questionary = True
-
-    class EmptyValidator(Validator):
-        def validate(self, document):
-            if len(document.text) == 0:
-                raise ValidationError(message="Please enter a value")
-
-except Exception:
-    has_questionary = False
-
-
-def _ask_variable(name, default=None, allow_empty=False):
-    if config.args.defaults:
-        if not default and not allow_empty:
-            fatal(f"{name} has no default")
-        return default
-    while True:
-        val = input(f"{name}: ")
-        val = default if val == "" else val
-        if val != "" or allow_empty:
-            return val
-
-
-def _ask_variable_string(name, default=None, allow_empty=False):
-    if has_questionary:
-        try:
-            validate = None if allow_empty else EmptyValidator
-            return questionary.text(
-                name + ":", default=default or "", validate=validate
-            ).unsafe_ask()
-        except KeyboardInterrupt:
-            fatal("Running interrupted")
-    else:
-        text = f" ({default})" if default else ""
-        return _ask_variable(name + text, default if default else "", allow_empty)
-
-
-def _ask_variable_bool(name, default=True):
-    if has_questionary:
-        try:
-            return questionary.confirm(name + "?", default=default, auto_enter=False).unsafe_ask()
-        except KeyboardInterrupt:
-            fatal("Running interrupted")
-    else:
-        text = " (Y/n)" if default else " (y/N)"
-        return _ask_variable(name + text, "Y" if default else "N").lower()[0] == "y"
-
-
-def _ask_variable_choice(name, choices, default=None):
-    if has_questionary:
-        try:
-            plain = questionary.Style([("selected", "noreverse")])
-            return questionary.select(
-                name + ":", choices=choices, default=default, style=plain
-            ).unsafe_ask()
-        except KeyboardInterrupt:
-            fatal("Running interrupted")
-    else:
-        default = default or choices[0]
-        text = f" ({default})" if default else ""
-        while True:
-            got = _ask_variable(name + text, default if default else "")
-            if got in choices:
-                return got
-            else:
-                warn(f"unknown option: {got}")
+from validate import OutputValidator
 
 
 # Returns the alphanumeric version of a string:
 # This reduces it to a string that follows the regex:
 # [a-zA-Z0-9][a-zA-Z0-9_.-]*[a-zA-Z0-9]
-def _alpha_num(string):
+def _alpha_num(string: str) -> str:
     s = re.sub(r"[^a-zA-Z0-9_.-]", "", string.lower().replace(" ", "").replace("-", ""))
-    while s.startswith("_.-"):
+    while len(s) and s[0] in "_.-":
         s = s[1:]
-    while s.endswith("_.-"):
+    while len(s) and s[-1] in "_.-":
         s = s[:-1]
     return s
 
 
-def new_contest():
+def new_contest() -> None:
     if config.args.contest:
         fatal("--contest does not work for new_contest.")
     if config.args.problem:
         fatal("--problem does not work for new_contest.")
 
     # Ask for all required infos.
-    title = _ask_variable_string("name", config.args.contestname)
-    subtitle = _ask_variable_string("subtitle", "", True).replace("_", "-")
-    dirname = _ask_variable_string("dirname", _alpha_num(title))
-    author = _ask_variable_string("author", f"The {title} Jury").replace("_", "-")
-    testsession = _ask_variable_bool("testsession", False)
-    year = _ask_variable_string("year", str(datetime.datetime.now().year))
-    source_url = _ask_variable_string("source url", "", True)
-    license = _ask_variable_choice("license", config.KNOWN_LICENSES)
-    rights_owner = _ask_variable_string(
+    title = ask_variable_string("name", config.args.contestname)
+    subtitle = ask_variable_string("subtitle", "", True).replace("_", "-")
+    dirname = ask_variable_string("dirname", _alpha_num(title))
+    author = ask_variable_string("author", f"The {title} Jury").replace("_", "-")
+    test_session = ask_variable_bool("test session", False)
+    year = ask_variable_string("year", str(datetime.datetime.now().year))
+    source_url = ask_variable_string("source url", "", True)
+    license = ask_variable_choice("license", config.KNOWN_LICENSES)
+    rights_owner = ask_variable_string(
         "rights owner (if left empty, defaults to problem author)", "", allow_empty=True
     )
     rights_owner = f"rights_owner: {rights_owner}\n" if rights_owner else ""
@@ -121,7 +53,7 @@ def new_contest():
     )
 
 
-def get_skel_dir(target_dir):
+def get_skel_dir(target_dir: Path) -> tuple[Path, bool]:
     skeldir = config.TOOLS_ROOT / "skel/problem"
     preserve_symlinks = False
     if (target_dir / "skel/problem").is_dir():
@@ -136,44 +68,44 @@ def get_skel_dir(target_dir):
     return (skeldir, preserve_symlinks)
 
 
-def new_problem():
+def new_problem() -> None:
     target_dir = Path(".")
     if config.args.contest:
         os.chdir(Path(config.args.contest))
     if config.args.problem:
         fatal("--problem does not work for new_problem.")
 
-    statement_languages = config.args.languages if config.args.languages else ["en"]
+    statement_languages = config.args.lang if config.args.lang else ["en"]
     main_language = "en" if "en" in statement_languages else statement_languages[0]
 
     problemname = {
         lang: (
             config.args.problemname
             if config.args.problemname
-            else _ask_variable_string(f"problem name ({lang})")
+            else ask_variable_string(f"problem name ({lang})")
         )
         for lang in statement_languages
     }
     dirname = (
         _alpha_num(config.args.problemname)
         if config.args.problemname
-        else _ask_variable_string("dirname", _alpha_num(problemname[main_language]))
+        else ask_variable_string("dirname", _alpha_num(problemname[main_language]))
     )
-    author = config.args.author if config.args.author else _ask_variable_string("author")
+    author = config.args.author if config.args.author else ask_variable_string("author")
 
-    output_validator_args = "#output_validator_args:"
+    output_validator_args = f"#{OutputValidator.args_key}:"
     custom_output = False
     if config.args.type:
         problem_type = config.args.type
     else:
-        problem_type = _ask_variable_choice(
+        problem_type = ask_variable_choice(
             "type",
             ["pass-fail", "float", "custom", "interactive", "multi-pass", "interactive multi-pass"],
         )
     # The validation type `float` is not official, it only helps setting the `output_validator_args`.
     if problem_type == "float":
         problem_type = "pass-fail"
-        output_validator_args = "output_validator_args: float_tolerance 1e-6"
+        output_validator_args = f"{OutputValidator.args_key}: float_tolerance 1e-6"
         log("Using default float tolerance of 1e-6")
     # Since version 2023-07-draft of the spec, the `custom` validation type is no longer explicit.
     # The mere existence of the output_validator(s)/ folder signals non-default output validation.
@@ -189,22 +121,25 @@ def new_problem():
         "dirname": dirname,
         "author": author,
         "type": problem_type,
-        "output_validator_args": output_validator_args,
-        "testdata_yaml_comment": "#" if output_validator_args[0] == "#" else "",
+        OutputValidator.args_key: output_validator_args,
+        "test_group_yaml_comment": "#" if output_validator_args[0] == "#" else "",
     }
 
-    source_name = _ask_variable_string(
+    source_name = ask_variable_string(
         "source", variables.get("source", variables.get("name", "")), True
     )
-    source_url = _ask_variable_string("source url", variables.get("source_url", ""), True)
-    variables["source"] = (
-        f"source:\n  name: {source_name}\n{f'  url: {source_url}' if source_url else '  #url:'}"
-    )
+    if source_name:
+        source_url = ask_variable_string("source url", variables.get("source_url", ""), True)
+        variables["source"] = (
+            f"source:\n  name: {source_name}\n{f'  url: {source_url}' if source_url else '  #url:'}\n"
+        )
+    else:
+        variables["source"] = ""
 
-    variables["license"] = _ask_variable_choice(
+    variables["license"] = ask_variable_choice(
         "license", config.KNOWN_LICENSES, variables.get("license", None)
     )
-    variables["rights_owner"] = _ask_variable_string(
+    variables["rights_owner"] = ask_variable_string(
         f"rights owner{'' if variables.get('rights_owner', '') else ' (if left empty, defaults to problem author)'}",
         variables.get("rights_owner", ""),
         allow_empty=True,
@@ -218,9 +153,9 @@ def new_problem():
     skeldir, preserve_symlinks = get_skel_dir(target_dir)
     log(f"Copying {skeldir} to {target_dir / dirname}.")
 
-    if "2023-07-draft" not in (skeldir / "problem.yaml").read_text():
+    if config.SPEC_VERSION not in (skeldir / "problem.yaml").read_text():
         fatal(
-            "new_problem only supports `skel` directories where `problem.yaml` has `version: 2023-07-draft."
+            f"new_problem only supports `skel` directories where `problem.yaml` has `version: {config.SPEC_VERSION}."
         )
 
     problems_yaml = target_dir / "problems.yaml"
@@ -230,7 +165,7 @@ def new_problem():
             data = read_yaml(problems_yaml) or []
             prev_label = data[-1]["label"] if data else None
             next_label = (
-                ("X" if contest.contest_yaml().get("testsession") else "A")
+                ("X" if contest.contest_yaml().get("test_session") else "A")
                 if prev_label is None
                 else inc_label(prev_label)
             )
@@ -239,7 +174,7 @@ def new_problem():
                 {
                     "id": dirname,
                     "label": next_label,
-                    "name": problemname[main_language],
+                    "name": problemname,
                     "rgb": "#000000",
                     "time_limit": 1.0,
                 }
@@ -248,24 +183,29 @@ def new_problem():
         else:
             error("ruamel.yaml library not found. Please update problems.yaml manually.")
 
+    skip = []
+    if not custom_output:
+        skip.append(skeldir / OutputValidator.source_dir)
+
     copytree_and_substitute(
         skeldir,
         target_dir / dirname,
         variables,
         exist_ok=True,
         preserve_symlinks=preserve_symlinks,
-        skip=[skeldir / "output_validators"] if not custom_output else None,
+        skip=skip,
     )
 
     # Warn about missing problem statement skeletons for non-en languages
     for lang in statement_languages:
-        filename = f"problem.{lang}.tex"
-        statement_path = target_dir / dirname / "problem_statement" / filename
+        statement_path = target_dir / dirname / latex.PdfType.PROBLEM.path(lang)
         if not statement_path.is_file():
-            warn(f"No skeleton for {filename} found. Create it manually or update skel/problem.")
+            warn(
+                f"No skeleton for {statement_path.name} found. Create it manually or update skel/problem."
+            )
 
 
-def rename_problem(problem):
+def rename_problem(problem: Problem) -> None:
     if not has_ryaml:
         fatal("ruamel.yaml library not found.")
 
@@ -273,14 +213,14 @@ def rename_problem(problem):
         lang: (
             config.args.problemname
             if config.args.problemname
-            else _ask_variable_string(f"New problem name ({lang})", problem.settings.name[lang])
+            else ask_variable_string(f"New problem name ({lang})", problem.settings.name[lang])
         )
         for lang in problem.statement_languages
     }
     dirname = (
         _alpha_num(config.args.problemname)
         if config.args.problemname
-        else _ask_variable_string("dirname", _alpha_num(newname[problem.statement_languages[0]]))
+        else ask_variable_string("dirname", _alpha_num(newname[problem.statement_languages[0]]))
     )
 
     shutil.move(problem.name, dirname)
@@ -290,11 +230,6 @@ def rename_problem(problem):
     data["name"] = newname
     write_yaml(data, problem_yaml)
 
-    # DOMjudge does not yet support multilingual problems.yaml files.
-    statement_language = force_single_language([problem])
-    if isinstance(newname, dict):
-        newname = newname[statement_language]
-
     problems_yaml = Path("problems.yaml")
     if problems_yaml.is_file():
         data = read_yaml(problems_yaml) or []
@@ -305,7 +240,7 @@ def rename_problem(problem):
             write_yaml(data, problems_yaml)
 
 
-def copy_skel_dir(problems):
+def copy_skel_dir(problems: list[Problem]) -> None:
     assert len(problems) == 1
     problem = problems[0]
 
@@ -337,10 +272,27 @@ def copy_skel_dir(problems):
 
 
 # NOTE: This is one of few places that prints to stdout instead of stderr.
-def create_gitlab_jobs(contest: str, problems: list[Problem]):
-    git_root_path = Path(os.popen("git rev-parse --show-toplevel").read().strip()).resolve()
+def create_gitlab_jobs(contest: str, problems: list[Problem]) -> None:
+    if shutil.which("git") is None:
+        error("git command not found!")
+        return
+
+    def git(*args):
+        res = exec_command(
+            ["git", *args],
+            crop=False,
+            preexec_fn=False,
+            timeout=None,
+        )
+        return res.out if res else ""
+
+    if not git("rev-parse", "--is-inside-work-tree").startswith("true"):
+        error("not inside git")
+        return
+
+    git_root_path = Path(git("rev-parse", "--show-toplevel").strip()).resolve()
 
-    def problem_source_dir(problem: Problem):
+    def problem_source_dir(problem: Problem) -> Path:
         return problem.path.resolve().relative_to(git_root_path)
 
     if config.args.latest_bt:
@@ -352,8 +304,9 @@ def problem_source_dir(problem: Problem):
     contest_yml = (config.TOOLS_ROOT / "skel/gitlab_ci/contest.yaml").read_text()
     contest_path = Path(".").resolve().relative_to(git_root_path)
     changes = "".join(
-        "      - " + str(problem_source_dir(problem)) + "/problem_statement/**/*\n"
+        f"      - {problem_source_dir(problem)}/{pdf_type.path().parent}/**/*\n"
         for problem in problems
+        for pdf_type in latex.PdfType
     )
     print(
         substitute(
@@ -372,7 +325,7 @@ def problem_source_dir(problem: Problem):
         )
 
 
-def create_forgejo_actions(contest: str, problems: list[Problem]):
+def create_forgejo_actions(contest: str, problems: list[Problem]) -> None:
     if Path(".git").is_dir():
         contest_path = Path(".")
         forgejo = Path(".forgejo")
@@ -418,7 +371,7 @@ def create_forgejo_actions(contest: str, problems: list[Problem]):
 
 # Differences with forgejo:
 # - flat structure, with all workflows directly in `.github/workflows`.
-def create_github_actions(contest: str, problems: list[Problem]):
+def create_github_actions(contest: str, problems: list[Problem]) -> None:
     if config.args.latest_bt:
         fatal("Caching the latest BAPCtools is not supported for github actions.")
 
diff --git a/bin/slack.py b/bin/slack.py
index cda86e11e..0f16426da 100644
--- a/bin/slack.py
+++ b/bin/slack.py
@@ -1,3 +1,4 @@
+import config
 from util import *
 
 # Perform slack actions for the selected problems (all, or the selected/current one).
diff --git a/bin/stats.py b/bin/stats.py
index 5e797ad4d..68786c5db 100644
--- a/bin/stats.py
+++ b/bin/stats.py
@@ -11,7 +11,9 @@
 
 import config
 import generate
+import latex
 import program
+import validate
 from util import error, exec_command, glob, warn
 
 Selector = str | Callable | list[str] | list[Callable]
@@ -48,11 +50,11 @@ def problem_stats(problems):
         # Roughly in order of importance
         ("  time", lambda p: p.limits.time_limit, 0),
         ("yaml", "problem.yaml"),
-        ("tex", "problem_statement/problem*.tex", 1),
-        ("sol", "problem_statement/solution*.tex", 1),
-        ("  val: I", ["input_validators/*", "input_format_validators/*"]),
-        ("A", ["answer_validators/*"]),
-        ("O", ["output_validators/*"]),
+        ("tex", str(latex.PdfType.PROBLEM.path("*")), 1),
+        ("sol", str(latex.PdfType.SOLUTION.path("*")), 1),
+        ("  val: I", [f"{validate.InputValidator.source_dir}/*"]),
+        ("A", [f"{validate.AnswerValidator.source_dir}/*"]),
+        ("O", [f"{validate.OutputValidator.source_dir}/*"]),
         (
             "  sample",
             [lambda s: {x.stem for x in s if x.parts[2] == "sample"}],
@@ -66,12 +68,12 @@ def problem_stats(problems):
             100,
         ),
         (
-            "bad",
+            "inv",
             [lambda s: {x.stem for x in s if x.parts[2] in config.INVALID_CASE_DIRECTORIES}],
             0,
         ),
         (
-            "good",
+            "v_o",
             [lambda s: {x.stem for x in s if x.parts[2] in ["valid_output"]}],
             0,
         ),
@@ -172,7 +174,7 @@ def count(path):
         def value(x):
             if x[0] == "  time" or x[0] == "subs":
                 return x[1](problem)
-            if x[0] == "A" and (problem.interactive or problem.multi_pass):
+            if x[0] == "A" and problem.interactive:
                 return None  # Do not show an entry for the answer validator if it is not required
             if x[0] == "O" and not problem.custom_output:
                 return None  # Do not show an entry for the output validator if it is not required
@@ -375,7 +377,7 @@ def get_submissions_row(display_name, names):
 
     # git stats
     if shutil.which("git") is None:
-        error("git not found!")
+        error("git command not found!")
         return
 
     def git(*args):
diff --git a/bin/testcase.py b/bin/testcase.py
index c47a1e979..2a2de206b 100644
--- a/bin/testcase.py
+++ b/bin/testcase.py
@@ -1,20 +1,28 @@
 """Test case"""
 
-from typing import cast, Literal
+from collections.abc import Sequence
+from colorama import Fore, Style
+from pathlib import Path
+from typing import Optional, TYPE_CHECKING
 
 from util import (
+    BAR_TYPE,
     ExecStatus,
     combine_hashes_dict,
     fatal,
     print_name,
+    ProgressBar,
     shorten_path,
-    warn,
 )
-from colorama import Fore, Style
 import config
 import validate
 
+if TYPE_CHECKING:  # Prevent circular import: https://stackoverflow.com/a/39757388
+    import visualize
+    import problem
 
+
+# TODO #102: Consistently separate the compound noun "test case", e.g. "TestCase" or "test_case"
 class Testcase:
     """
     A single test case. It consists of files with matching base names, typically
@@ -52,12 +60,18 @@ class Testcase:
     ans_path: Path
         Like `hamiltonicity/data/secret/cubic/petersen.ans`.
 
-    testdata_yaml: dict
-        The YAML-parsed test data flags that apply to this test case.
+    out_path: Path
+        Like `hamiltonicity/data/secret/cubic/petersen.out`.
 
     """
 
-    def __init__(self, base_problem, path, *, short_path=None, print_warn=False):
+    def __init__(
+        self,
+        base_problem: "problem.Problem",
+        path: Path,
+        *,
+        short_path: Optional[Path] = None,
+    ):
         """
         Arguments
         ---------
@@ -69,54 +83,45 @@ def __init__(self, base_problem, path, *, short_path=None, print_warn=False):
             is the (absolute) path to the input file, and `short_path` is used as the equivalent of the testcase's
             path relative to  `problem.path / 'data'`.
         """
-        assert path.suffix == ".in" or path.suffixes == [".in", ".statement"]
+        assert path.suffix == ".in"
 
         self.problem = base_problem
 
-        # TODO add self.out_path
         if short_path is None:
             try:
-                self.short_path = path.relative_to(self.problem.path / "data")
+                self.short_path: Path = path.relative_to(self.problem.path / "data")
             except ValueError:
                 fatal(f"Testcase {path} is not inside {self.problem.path / 'data'}.")
         else:
             self.short_path = short_path
 
-        self.root = self.short_path.parts[0]
+        self.root: str = self.short_path.parts[0]
 
-        self.in_path = path
-        self.ans_path = (
-            self.in_path.with_suffix(".ans")
-            if path.suffix == ".in"
-            else self.in_path.with_name(self.in_path.with_suffix("").stem + ".ans.statement")
-        )
-        self.out_path = (
-            None
-            if self.root not in ["valid_output", "invalid_output"]
-            else self.in_path.with_suffix(".out")
+        self.in_path: Path = path
+        self.ans_path: Path = self.in_path.with_suffix(".ans")
+        self.out_path: Optional[Path] = (
+            self.in_path.with_suffix(".out")
+            if self.root in ["valid_output", "invalid_output"]
+            or self.in_path.with_suffix(".out").is_file()
+            else None
         )
-        # Display name: everything after data/.
-        self.name = str(self.short_path.with_suffix(""))
 
-        # Backwards compatibility support for `data/bad`.
-        if self.root == "bad":
-            if print_warn:
-                warn("data/bad is deprecated. Use data/{invalid_input,invalid_answer} instead.")
-            self.root = "invalid_answer" if self.ans_path.is_file() else "invalid_input"
+        # Display name: everything after data/.
+        self.name: str = str(self.short_path.with_suffix(""))
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self.name
 
-    def with_suffix(self, ext):
+    def with_suffix(self, ext: str) -> Path:
         return self.in_path.with_suffix(ext)
 
-    def testdata_yaml_validator_args(
+    def test_case_yaml_args(
         self,
-        validator,  # TODO #102: Fix circular import when setting type to validate.AnyValidator
-        bar,  # TODO #102: Type should probably be ProgressBar | PrintBar or something
+        program: "validate.AnyValidator | visualize.AnyVisualizer",
+        bar: BAR_TYPE,
     ) -> list[str]:
         """
-        The flags specified in testdata.yaml for the given validator applying to this testcase.
+        The flags specified in test_group.yaml for the given validator applying to this testcase.
 
         Returns
         -------
@@ -124,21 +129,17 @@ def testdata_yaml_validator_args(
         A nonempty list of strings, such as ["space_change_sensitive", "case_sensitive"]
         or ["--max_N", "50"] or even [""].
         """
-        key, name = (
-            ("input_validator_args", validator.name)
-            if isinstance(validator, validate.InputValidator)
-            else ("output_validator_args", None)
-        )
 
-        path = self.problem.path / "data" / self.short_path
-        return self.problem.get_testdata_yaml(
-            path,
-            cast(Literal["input_validator_args", "output_validator_args"], key),
+        return self.problem.get_test_case_yaml(
+            self.problem.path / "data" / self.short_path,
+            type(program).args_key,
             bar,
-            name=name,
+            name=program.name if isinstance(program, validate.InputValidator) else None,
         )
 
-    def validator_hashes(self, cls: type["validate.AnyValidator"], bar):
+    def validator_hashes(
+        self, cls: type[validate.AnyValidator], bar: BAR_TYPE
+    ) -> dict[str, dict[str, str]]:
         """
         Returns
         -------
@@ -146,38 +147,37 @@ def validator_hashes(self, cls: type["validate.AnyValidator"], bar):
              hash =>
              - name
              - flags
-             - hash
         indicating which validators will be run for this testcase.
         """
         assert cls in [validate.InputValidator, validate.AnswerValidator, validate.OutputValidator]
-        validators = self.problem.validators(cls) or []
+        validators = self.problem.validators(cls)
 
         d = dict()
 
         for validator in validators:
-            flags = self.testdata_yaml_validator_args(validator, bar)
-            if flags is False:
-                continue
+            flags = self.test_case_yaml_args(validator, bar)
             flags_string = " ".join(flags)
-            o = {
+            h = combine_hashes_dict(
+                {
+                    "name": validator.name,
+                    "flags": flags_string,
+                    "hash": validator.hash,
+                }
+            )
+            d[h] = {
                 "name": validator.name,
                 "flags": flags_string,
-                "hash": validator.hash,
             }
-            h = combine_hashes_dict(o)
-            # Don't actually store the somewhat useless validator hash.
-            del o["hash"]
-            d[h] = o
 
         return d
 
     def validate_format(
         self,
-        mode: "validate.Mode",
+        mode: validate.Mode,
         *,
-        bar,
-        constraints=None,
-        warn_instead_of_error=False,
+        bar: ProgressBar,
+        constraints: Optional[validate.ConstraintsDict] = None,
+        warn_instead_of_error: bool = False,
     ) -> bool:
         check_constraints = constraints is not None
 
@@ -205,7 +205,7 @@ def validate_format(
                     warn_instead_of_error=warn_instead_of_error,
                 )
             case validate.Mode.INVALID:
-                assert self.root in config.INVALID_CASE_DIRECTORIES[:-1]
+                assert self.root in config.INVALID_CASE_DIRECTORIES
 
                 ok = self.validate_format(
                     validate.Mode.INPUT,
@@ -237,7 +237,6 @@ def validate_format(
                     warn_instead_of_error=warn_instead_of_error,
                 )
             case validate.Mode.VALID_OUTPUT:
-                assert self.root == "valid_output"
                 assert not self.problem.interactive
                 assert not self.problem.multi_pass
 
@@ -272,25 +271,23 @@ def validate_format(
 
     def _run_validators(
         self,
-        mode: "validate.Mode",
-        validators,
-        expect_rejection,
+        mode: validate.Mode,
+        validators: Sequence[validate.AnyValidator],
+        expect_rejection: bool,
         *,
-        bar,
-        constraints=None,
-        warn_instead_of_error=False,
+        bar: ProgressBar,
+        constraints: Optional[validate.ConstraintsDict] = None,
+        warn_instead_of_error: bool = False,
     ) -> bool:
         args = []
         results = []
         for validator in validators:
             name = validator.name
-            if type(validator) is validate.OutputValidator and mode == validate.Mode.ANSWER:
+            if isinstance(validator, validate.OutputValidator) and mode == validate.Mode.ANSWER:
                 args += ["case_sensitive", "space_change_sensitive"]
                 name = f"{name} (ans)"
-            flags = self.testdata_yaml_validator_args(validator, bar)
-            if flags is False:
-                continue
-            flags = args if flags is None else flags + args
+            flags = self.test_case_yaml_args(validator, bar)
+            flags = flags + args
 
             ret = validator.run(self, mode=mode, constraints=constraints, args=flags)
             results.append(ret.status)
@@ -334,7 +331,7 @@ def _run_validators(
                 data += (
                     f"{Style.RESET_ALL}-> {shorten_path(self.problem, file.parent) / file.name}\n"
                 )
-            else:
+            elif ret.err:
                 data = ret.err
 
             if expect_rejection:
@@ -352,7 +349,7 @@ def _run_validators(
                 )
             else:
                 bar.part_done(
-                    ret.status,
+                    bool(ret.status),
                     message,
                     data=data,
                     warn_instead_of_error=warn_instead_of_error,
@@ -395,11 +392,23 @@ def _run_validators(
                     bar.error(msg, resume=True)
         else:
             success = all(results)
-            if success and mode in [validate.Mode.INPUT, validate.Mode.ANSWER]:
-                validate.sanity_check(
-                    self.problem,
-                    self.in_path if mode == validate.Mode.INPUT else self.ans_path,
-                    bar,
-                )
+            if success:
+                main_path: Optional[Path] = None
+                if mode == validate.Mode.INPUT:
+                    main_path = self.in_path
+                elif mode == validate.Mode.ANSWER:
+                    main_path = self.ans_path
+                elif mode == validate.Mode.VALID_OUTPUT and self.root not in [
+                    "valid_output",
+                    "invalid_output",
+                ]:
+                    main_path = self.out_path
+
+                if main_path is not None:
+                    validate.sanity_check(
+                        self.problem,
+                        main_path,
+                        bar,
+                    )
 
         return success
diff --git a/bin/tools.py b/bin/tools.py
index b6b9a792a..7ec91d275 100755
--- a/bin/tools.py
+++ b/bin/tools.py
@@ -24,12 +24,14 @@
 import colorama
 import re
 
+from colorama import Style
 from pathlib import Path
-from typing import Literal, cast
+from typing import cast, Literal, Optional
 
 # Local imports
 import config
 import constraints
+import contest
 import export
 import generate
 import fuzz
@@ -39,11 +41,11 @@
 import solve_stats
 import download_submissions
 import stats
+import upgrade
 import validate
 import signal
 
 from problem import Problem
-import contest
 from contest import *
 from util import *
 
@@ -135,7 +137,9 @@ def parse_problems_yaml(problemlist):
 
     def fallback_problems():
         problem_paths = list(filter(is_problem_directory, glob(Path("."), "*/")))
-        label = chr(ord("Z") - len(problem_paths) + 1) if contest_yaml().get("testsession") else "A"
+        label = (
+            chr(ord("Z") - len(problem_paths) + 1) if contest_yaml().get("test_session") else "A"
+        )
         problems = []
         for path in problem_paths:
             problems.append((path, label))
@@ -177,45 +181,74 @@ def fallback_problems():
             if len(problems) == 0:
                 fatal("Did not find problem.yaml. Are you running this from a problem directory?")
 
-        if config.args.order:
+        if config.args.order or contest_yaml().get("order"):
+            order = config.args.order or contest_yaml()["order"]
+
             # Sort by position of id in order
             def get_pos(id):
-                if id in config.args.order:
-                    return config.args.order.index(id)
+                if id in order:
+                    return order.index(id)
                 else:
-                    return len(config.args.order) + 1
+                    return len(order)
 
             problems.sort(key=lambda p: (get_pos(p.label), p.label))
 
         if config.args.order_from_ccs:
             # Sort by increasing difficulty, extracted from the CCS api.
-            # Get active contest.
+            class ProblemStat:
+                def __init__(self):
+                    self.solved = 0
+                    self.submissions = 0
+                    self.pending = 0
+                    self.teams_submitted = 0
+                    self.teams_pending = 0
+
+                def update(self, team_stats: dict[str, Any]):
+                    if team_stats["solved"]:
+                        self.solved += 1
+                    if team_stats["num_judged"]:
+                        self.submissions += team_stats["num_judged"]
+                        self.teams_submitted += 1
+                    if team_stats["num_pending"]:
+                        self.pending += team_stats["num_pending"]
+                        self.teams_pending += 1
+
+                def key(self) -> tuple[int, int]:
+                    # self.solved more AC => easier
+                    # possible tie breakers:
+                    # self.submissions more needed to get the same number of AC => Harder
+                    # self.teams_pending more teams tried => appeared easier
+                    # TODO: consider more stats?
+                    return (-self.solved, self.submissions)
 
+            # Get active contest.
             cid = get_contest_id()
-            solves = dict()
 
             # Read set of problems
             contest_problems = call_api_get_json(f"/contests/{cid}/problems?public=true")
             assert isinstance(problems, list)
-            for path in contest_problems:
-                solves[path["id"]] = 0
+
+            problem_stats = {problem["id"]: ProblemStat() for problem in contest_problems}
 
             scoreboard = call_api_get_json(f"/contests/{cid}/scoreboard?public=true")
 
             for team in scoreboard["rows"]:
-                for path in team["problems"]:
-                    if path["solved"]:
-                        solves[path["problem_id"]] += 1
-
-            # Convert away from defaultdict, so any non matching keys below raise an error.
-            solves = dict(solves)
-            verbose("solves: " + str(solves))
+                for team_stats in team["problems"]:
+                    problem_stats[team_stats["problem_id"]].update(team_stats)
 
             # Sort the problems
-            # Use negative solves instead of reversed, to preserver stable order.
-            problems.sort(key=lambda p: (-solves[p.name], p.label))
-            order = ", ".join(map(lambda p: str(p.label), problems))
-            verbose("order: " + order)
+            problems.sort(key=lambda p: (problem_stats[p.name].key(), p.label))
+            verbose(f"order: {', '.join(map(lambda p: str(p.label), problems))}")
+
+            if ask_variable_bool("Update order in contest.yaml"):
+                if has_ryaml:
+                    contest_yaml_path = Path("contest.yaml")
+                    data = contest_yaml()
+                    data["order"] = [p.label for p in problems]
+                    write_yaml(data, contest_yaml_path)
+                    log("Updated order")
+                else:
+                    error("ruamel.yaml library not found. Update the order manually.")
 
     contest_name = Path().cwd().name
 
@@ -342,15 +375,20 @@ def build_parser():
         action="store_true",
         help="Copy the output pdf instead of symlinking it.",
     )
-    global_parser.add_argument(
-        "--language", dest="languages", action="append", help="Set language."
-    )
+    global_parser.add_argument("--lang", nargs="+", help="Languages to include.")
 
     subparsers = parser.add_subparsers(
         title="actions", dest="action", parser_class=SuppressingParser
     )
     subparsers.required = True
 
+    # upgrade
+    subparsers.add_parser(
+        "upgrade",
+        parents=[global_parser],
+        help="Upgrade a problem or contest.",
+    )
+
     # New contest
     contestparser = subparsers.add_parser(
         "new_contest",
@@ -601,21 +639,25 @@ def build_parser():
     )
     genparser.add_argument(
         "--no-validators",
+        default=False,
         action="store_true",
         help="Ignore results of input and answer validation. Validators are still run.",
     )
     genparser.add_argument(
         "--no-solution",
+        default=False,
         action="store_true",
         help="Skip generating .ans/.interaction files with the solution.",
     )
     genparser.add_argument(
         "--no-visualizer",
+        default=False,
         action="store_true",
         help="Skip generating graphics with the visualizer.",
     )
     genparser.add_argument(
         "--no-testcase-sanity-checks",
+        default=False,
         action="store_true",
         help="Skip sanity checks on testcases.",
     )
@@ -657,6 +699,12 @@ def build_parser():
         action="store_true",
         help="Do not run `generate` before running submissions.",
     )
+    runparser.add_argument(
+        "--visualizer",
+        dest="no_visualizer",
+        action="store_false",
+        help="Also run the output visualizer.",
+    )
     runparser.add_argument(
         "--all",
         "-a",
@@ -727,6 +775,9 @@ def build_parser():
     timelimitparser.add_argument(
         "--timeout", "-t", type=int, help="Override the default timeout. Default: 60."
     )
+    timelimitparser.add_argument(
+        "--no-generate", "-G", action="store_true", help="Do not run `generate`."
+    )
 
     # Test
     testparser = subparsers.add_parser(
@@ -806,12 +857,22 @@ def build_parser():
         action="store_true",
         help="Make a zip more following the kattis problemarchive.com format.",
     )
+    zipparser.add_argument(
+        "--legacy",
+        action="store_true",
+        help="Make a zip more following the legacy format.",
+    )
     zipparser.add_argument("--no-solutions", action="store_true", help="Do not compile solutions")
 
     # Build a zip with all samples.
-    subparsers.add_parser(
+    samplezipparser = subparsers.add_parser(
         "samplezip", parents=[global_parser], help="Create zip file of all samples."
     )
+    samplezipparser.add_argument(
+        "--legacy",
+        action="store_true",
+        help="Make a zip more following the legacy format.",
+    )
 
     gitlab_parser = subparsers.add_parser(
         "gitlabci", parents=[global_parser], help="Print a list of jobs for the given contest."
@@ -848,6 +909,11 @@ def build_parser():
         action="store",
         help="Contest ID to use when writing to the API. Defaults to value of contest_id in contest.yaml.",
     )
+    exportparser.add_argument(
+        "--legacy",
+        action="store_true",
+        help="Make export more following the legacy format.",
+    )
 
     updateproblemsyamlparser = subparsers.add_parser(
         "update_problems_yaml",
@@ -863,6 +929,16 @@ def build_parser():
         action="store_true",
         help="Sort the problems by id.",
     )
+    updateproblemsyamlparser.add_argument(
+        "--number",
+        action="store_true",
+        help="Use Sxx as problem labels.",
+    )
+    updateproblemsyamlparser.add_argument(
+        "--legacy",
+        action="store_true",
+        help="Make problems.yaml more following the legacy format.",
+    )
 
     # Print the corresponding temporary directory.
     tmpparser = subparsers.add_parser(
@@ -941,6 +1017,11 @@ def run_parsed_arguments(args):
         else:
             config.args.testcases = []
 
+    # upgrade commands.
+    if action == "upgrade":
+        upgrade.upgrade()
+        return
+
     # Skel commands.
     if action == "new_contest":
         skel.new_contest()
@@ -953,6 +1034,15 @@ def run_parsed_arguments(args):
     # Get problem_paths and cd to contest
     problems, level, contest, tmpdir = get_problems()
 
+    # Check non unique uuid
+    # TODO: check this even more globally?
+    uuids: dict[str, Problem] = {}
+    for p in problems:
+        if p.settings.uuid in uuids:
+            warn(f"{p.name} has the same uuid as {uuids[p.settings.uuid].name}")
+        else:
+            uuids[p.settings.uuid] = p
+
     # Check for incompatible actions at the problem/problemset level.
     if level != "problem":
         if action == "test":
@@ -1021,8 +1111,8 @@ def run_parsed_arguments(args):
         sampleout = Path("samples.zip")
         if level == "problem":
             sampleout = problems[0].path / sampleout
-        statement_language = export.force_single_language(problems)
-        export.build_samples_zip(problems, sampleout, statement_language)
+        languages = export.select_languages(problems)
+        export.build_samples_zip(problems, sampleout, languages)
         return
 
     if action == "rename_problem":
@@ -1083,7 +1173,7 @@ def run_parsed_arguments(args):
 
         if action in ["generate"]:
             success &= generate.generate(problem)
-        if action in ["all", "constraints", "run"] and not config.args.no_generate:
+        if action in ["all", "constraints", "run", "time_limit"] and not config.args.no_generate:
             # Call `generate` with modified arguments.
             old_args = argparse.Namespace(**vars(config.args))
             config.args.jobs = (os.cpu_count() or 1) // 2
@@ -1162,10 +1252,16 @@ def run_parsed_arguments(args):
                 config.args = old_args
 
                 if not config.args.kattis:
-                    # Make sure that all problems use the same language for the PDFs
-                    export.force_single_language(problems)
-
                     success &= latex.build_problem_pdfs(problem)
+                    if not config.args.no_solutions:
+                        success &= latex.build_problem_pdfs(
+                            problem, build_type=latex.PdfType.SOLUTION
+                        )
+
+                    if problem.path.glob(str(latex.PdfType.PROBLEM_SLIDE.path("*"))):
+                        success &= latex.build_problem_pdfs(
+                            problem, build_type=latex.PdfType.PROBLEM_SLIDE
+                        )
 
                 if not config.args.force:
                     success &= problem.validate_data(validate.Mode.INPUT, constraints={})
@@ -1179,10 +1275,8 @@ def run_parsed_arguments(args):
             print(file=sys.stderr)
 
     if action in ["export"]:
-        # Add contest PDF for only one language to DOMjudge
-        statement_language = export.force_single_language(problems)
-
-        export.export_contest_and_problems(problems, statement_language)
+        languages = export.select_languages(problems)
+        export.export_contest_and_problems(problems, languages)
 
     if level == "problemset":
         print(f"{Style.BRIGHT}CONTEST {contest}{Style.RESET_ALL}", file=sys.stderr)
@@ -1210,50 +1304,53 @@ def run_parsed_arguments(args):
             )
 
         if action in ["zip"]:
-            statement_language = None
+            languages = []
             if not config.args.kattis:
-                # Add contest/solutions PDF for only one language to the zip file
-                statement_language = export.force_single_language(problems)
+                languages = export.select_languages(problems)
 
-                success &= latex.build_contest_pdfs(contest, problems, tmpdir, statement_language)
-                success &= latex.build_contest_pdfs(
-                    contest, problems, tmpdir, statement_language, web=True
-                )
-                if not config.args.no_solutions:
-                    success &= latex.build_contest_pdf(
-                        contest,
-                        problems,
-                        tmpdir,
-                        statement_language,
-                        build_type=latex.PdfType.SOLUTION,
-                    )
-                    success &= latex.build_contest_pdf(
-                        contest,
-                        problems,
-                        tmpdir,
-                        statement_language,
-                        build_type=latex.PdfType.SOLUTION,
-                        web=True,
-                    )
                 # Only build the problem slides if at least one problem has the TeX for it
-                if any(
-                    glob(problem.path / "problem_statement", "problem-slide.*.tex")
-                    for problem in problems
-                ):
-                    success &= latex.build_contest_pdf(
-                        contest,
-                        problems,
-                        tmpdir,
-                        statement_language,
-                        build_type=latex.PdfType.PROBLEM_SLIDE,
+                slideglob = latex.PdfType.PROBLEM_SLIDE.path("*")
+                build_problem_slides = any(
+                    problem.path.glob(str(slideglob)) for problem in problems
+                )
+
+                for language in languages:
+                    success &= latex.build_contest_pdfs(contest, problems, tmpdir, language)
+                    success &= latex.build_contest_pdfs(
+                        contest, problems, tmpdir, language, web=True
                     )
-                else:
-                    log("No problem has problem-slide.*.tex, skipping problem slides")
+                    if not config.args.no_solutions:
+                        success &= latex.build_contest_pdf(
+                            contest,
+                            problems,
+                            tmpdir,
+                            language,
+                            build_type=latex.PdfType.SOLUTION,
+                        )
+                        success &= latex.build_contest_pdf(
+                            contest,
+                            problems,
+                            tmpdir,
+                            language,
+                            build_type=latex.PdfType.SOLUTION,
+                            web=True,
+                        )
+                    if build_problem_slides:
+                        success &= latex.build_contest_pdf(
+                            contest,
+                            problems,
+                            tmpdir,
+                            language,
+                            build_type=latex.PdfType.PROBLEM_SLIDE,
+                        )
+
+                if not build_problem_slides:
+                    log(f"No problem has {slideglob.name}, skipping problem slides")
 
             outfile = contest + ".zip"
             if config.args.kattis:
                 outfile = contest + "-kattis.zip"
-            export.build_contest_zip(problems, problem_zips, outfile, statement_language)
+            export.build_contest_zip(problems, problem_zips, outfile, languages)
 
         if action in ["update_problems_yaml"]:
             export.update_problems_yaml(
@@ -1314,9 +1411,12 @@ def interrupt_handler(sig, frame):
     # Will likely only work on linux
     os.environ["MALLOC_PERTURB_"] = str(0b01011001)
 
-    parser = build_parser()
-    parser.set_defaults(**read_personal_config())
-    run_parsed_arguments(parser.parse_args())
+    try:
+        parser = build_parser()
+        parser.set_defaults(**read_personal_config())
+        run_parsed_arguments(parser.parse_args())
+    except AbortException:
+        fatal("Running interrupted")
 
 
 if __name__ == "__main__":
diff --git a/bin/upgrade.py b/bin/upgrade.py
new file mode 100644
index 000000000..7ef2ac1d6
--- /dev/null
+++ b/bin/upgrade.py
@@ -0,0 +1,632 @@
+import config
+import generate
+import shlex
+from collections import defaultdict
+from util import *
+from validate import InputValidator, AnswerValidator, OutputValidator
+
+import secrets
+import shutil
+from pathlib import Path
+from typing import Any, cast
+
+if has_ryaml:
+    # TODO #102 The conditional import in util.py isn't picked up properly
+    from ruamel.yaml.comments import CommentedMap, CommentedSeq
+
+
+# src_base must be a dir (or symlink to dir)
+# dst_base must not exists
+# the parents of dst_base must exist
+def _move_dir(src_base: Path, dst_base: Path) -> None:
+    assert src_base.is_dir()
+    assert not dst_base.exists()
+
+    src_base = src_base.absolute()
+    dst_base = dst_base.absolute()
+    base = [a for a, b in zip(reversed(src_base.parents), reversed(dst_base.parents)) if a == b][-1]
+
+    def resolve_up(parts: tuple[str, ...]) -> Path:
+        resolved: list[str] = []
+        for part in parts:
+            if part == ".":
+                continue
+            if part == ".." and len(resolved) and resolved[-1] != "..":
+                resolved.pop()
+            else:
+                resolved.append(part)
+        return Path(*resolved)
+
+    def movetree(src: Path, dst: Path) -> None:
+        if src.is_symlink():
+            # create a new symlink and make sure that the destination is handled properly
+            destination = src.readlink()
+            if destination.is_absolute():
+                # absolute links should stay absolute
+                # if their destination is inside the dir we move we have to change it
+                if destination.is_relative_to(src_base):
+                    destination = dst_base / destination.relative_to(src_base)
+                dst.symlink_to(destination)
+                src.unlink()
+            else:
+                if resolve_up(src.parent.parts + destination.parts).is_relative_to(src_base):
+                    # the link is relative and points to another file we move
+                    src.rename(dst)
+                else:
+                    # the link is relative but points to a fixed place
+                    src_rel = src.parent.relative_to(base)
+                    dst_rel = dst.parent.relative_to(base)
+                    parts = (("..",) * len(dst_rel.parts)) + src_rel.parts + destination.parts
+                    dst.symlink_to(resolve_up(parts))
+                    src.unlink()
+        elif src.is_dir():
+            # recursively move stuff inside dirs
+            dst.mkdir()
+            for file in [*src.iterdir()]:
+                movetree(file, dst / file.name)
+            # delete now empty dir
+            src.rmdir()
+        else:
+            # move file
+            src.rename(dst)
+
+    movetree(src_base, dst_base)
+
+
+def args_split(args: str):
+    splitted = CommentedSeq(shlex.split(args))
+    splitted.fa.set_flow_style()
+    return splitted
+
+
+def upgrade_contest_yaml(contest_yaml_path: Path, bar: ProgressBar) -> None:
+    yaml_data = read_yaml(contest_yaml_path)
+    if "testsession" in yaml_data:
+        ryaml_replace(yaml_data, "testsession", "test_session")
+        write_yaml(yaml_data, contest_yaml_path)
+        bar.log("renaming 'testsession' to 'test_session'")
+
+
+def upgrade_data(problem_path: Path, bar: ProgressBar) -> None:
+    rename = [
+        ("data/invalid_inputs", "data/invalid_input"),
+        ("data/invalid_answers", "data/invalid_answer"),
+        ("data/invalid_outputs", "data/invalid_output"),
+        ("data/valid_outputs", "data/valid_output"),
+    ]
+    for old_name, new_name in rename:
+        old_path = problem_path / old_name
+        new_path = problem_path / new_name
+        if old_path.is_dir():
+            if new_path.exists():
+                bar.error(f"can't rename '{old_name}', '{new_name}' already exists", resume=True)
+                continue
+            bar.log(f"renaming '{old_name}' to '{new_name}'")
+            old_path.rename(new_path)
+
+    # Move test cases in 'bad' to either 'invalid_input' or 'invalid_answer', whichever applies
+
+    def rename_testcase(old_base: Path, new_dir: Path) -> None:
+        new_dir.mkdir(parents=True, exist_ok=True)
+        new_base = new_dir / old_base.name
+        for ext in config.KNOWN_TEXT_DATA_EXTENSIONS:
+            old_path = old_base.with_suffix(ext)
+            new_path = new_base.with_suffix(ext)
+            if old_path.is_file():
+                old_rel_path, new_rel_path = [
+                    p.relative_to(problem_path) for p in (old_path, new_path)
+                ]
+                if new_path.exists():
+                    bar.error(
+                        f"can't rename '{old_rel_path}', '{new_rel_path}' already exists",
+                        resume=True,
+                    )
+                    continue
+                bar.log(f"renaming '{old_rel_path}' to '{new_rel_path}'")
+                old_path.rename(new_path)
+
+    bad_dir = problem_path / "data" / "bad"
+    for file in bad_dir.glob("*.in"):
+        if file.with_suffix(".ans").is_file():
+            rename_testcase(file, problem_path / "data" / "invalid_answer")
+        else:
+            rename_testcase(file, problem_path / "data" / "invalid_input")
+    if bad_dir.is_dir() and not any(bad_dir.iterdir()):
+        bad_dir.rmdir()
+
+    # Move .hint and .desc files to the Test Case Configuration .yaml file
+
+    test_case_yamls = defaultdict[Path, CommentedMap](CommentedMap)
+    for f in (problem_path / "data").rglob("*.yaml"):
+        if f.with_suffix(".in").exists():  # Prevent reading test_group.yaml, which has no *.in file
+            test_case_yamls[f] = read_yaml(f)
+
+    for f in (problem_path / "data").rglob("*.desc"):
+        test_case_yaml = test_case_yamls[f.with_suffix(".yaml")]
+        if "description" in test_case_yaml:
+            bar.warn(f"can't move '{f}' to '*.yaml', it already contains the key 'description'")
+        else:
+            bar.log(f"moving '{f}' to 'description' key in '*.yaml'")
+            test_case_yaml["description"] = f.read_text()
+            write_yaml(test_case_yaml, f.with_suffix(".yaml"))
+            f.unlink()
+
+    for f in (problem_path / "data").rglob("*.hint"):
+        test_case_yaml = test_case_yamls[f.with_suffix(".yaml")]
+        if "hint" in test_case_yaml:
+            bar.warn(f"can't move '{f}' to '*.yaml', it already contains the key 'hint'")
+        else:
+            bar.log(f"moving '{f}' to 'hint' key in '*.yaml'")
+            test_case_yaml["hint"] = f.read_text()
+            write_yaml(test_case_yaml, f.with_suffix(".yaml"))
+            f.unlink()
+
+
+def rename_testdata_to_test_group_yaml(problem_path: Path, bar: ProgressBar) -> None:
+    for f in (problem_path / "data").rglob("testdata.yaml"):
+        new_name = f.with_name("test_group.yaml")
+        rename_log = f"'{f.relative_to(problem_path)}' to '{new_name.relative_to(problem_path)}'"
+        if new_name.exists():
+            bar.error(f"can't rename {rename_log}, target already exists", resume=True)
+            continue
+        bar.log(f"renaming {rename_log}")
+        f.rename(new_name)
+
+
+def upgrade_test_group_yaml(problem_path: Path, bar: ProgressBar) -> None:
+    rename = [
+        ("output_validator_flags", OutputValidator.args_key),
+        ("input_validator_flags", InputValidator.args_key),
+    ]
+
+    for f in (problem_path / "data").rglob("test_group.yaml"):
+        data = cast(CommentedMap, read_yaml(f))
+
+        for old, new in rename:
+            if old in data:
+                if new in data:
+                    bar.error(
+                        f"can't change '{old}', '{new}' already exists in {f.relative_to(problem_path)}",
+                        resume=True,
+                    )
+                    continue
+                ryaml_replace(data, old, new)
+
+            if new in data and isinstance(data[new], str):
+                data[new] = args_split(data[new])
+
+        write_yaml(data, f)
+
+
+def upgrade_generators_yaml(problem_path: Path, bar: ProgressBar) -> None:
+    generators_yaml = problem_path / "generators" / "generators.yaml"
+    if not generators_yaml.is_file():
+        return
+    yaml_data = read_yaml(generators_yaml)
+    if yaml_data is None or not isinstance(yaml_data, dict):
+        return
+
+    changed = False
+
+    if "visualizer" in yaml_data:
+        warn(
+            "Cannot automatically upgrade 'visualizer'.\n - move visualizer to 'input_visualizer/'\n - first argument is the in_file\n - second argument is the ans_file"
+        )
+
+    if "data" in yaml_data and isinstance(yaml_data["data"], dict):
+        data = cast(CommentedMap, yaml_data["data"])
+
+        rename = [
+            ("invalid_inputs", "invalid_input"),
+            ("invalid_answers", "invalid_answer"),
+            ("invalid_outputs", "invalid_output"),
+            ("valid_outputs", "valid_output"),
+        ]
+        for old_name, new_name in rename:
+            if old_name in data:
+                if new_name in data:
+                    bar.error(
+                        f"can't rename 'data.{old_name}', 'data.{new_name}' already exists in generators.yaml",
+                        resume=True,
+                    )
+                    continue
+                bar.log(f"renaming 'data.{old_name}' to 'data.{new_name}' in generators.yaml")
+                ryaml_replace(data, old_name, new_name)
+                changed = True
+
+        # this breaks comments... but that is fine
+        if "bad" in data:
+
+            def move_testcase(name: str, value: Any, new_parent: str) -> None:
+                parent = ryaml_get_or_add(data, new_parent)
+                if "data" not in parent:
+                    parent[data] = CommentedSeq
+                parent = parent["data"]
+                new_name = name
+                if isinstance(parent, list):
+                    parent.append(CommentedMap())
+                    parent[-1][new_name] = value
+                else:
+                    if new_name in parent:
+                        new_name = f"bad_{new_name}"
+                    if new_name in parent:
+                        new_name = f"{new_name}_{secrets.token_hex(6)}"
+                    assert new_name not in parent
+                    parent[new_name] = value
+                bar.log(f"renaming 'bad.{name}' to '{new_parent}.{new_name}' in generators.yaml")
+
+            bad = data["bad"]
+            if "data" in bad and bad["data"]:
+                children = bad["data"] if isinstance(bad["data"], list) else [bad["data"]]
+                for dictionary in children:
+                    for child_name, child_data in sorted(dictionary.items()):
+                        if "ans" in child_data:
+                            move_testcase(child_name, child_data, "invalid_answer")
+                        else:
+                            move_testcase(child_name, child_data, "invalid_input")
+
+            ryaml_filter(data, "bad")
+            changed = True
+
+    def apply_recursively(
+        operation: Callable[[dict[str, Any], str], bool], data: dict[str, Any], path=""
+    ) -> bool:
+        changed = operation(data, path)
+        if "data" in data and data["data"]:
+            children = data["data"] if isinstance(data["data"], list) else [data["data"]]
+            for dictionary in children:
+                for child_name, child_data in sorted(dictionary.items()):
+                    if not child_name:
+                        child_name = '""'
+                    if generate.is_directory(child_data):
+                        changed |= apply_recursively(operation, child_data, path + "." + child_name)
+        return changed
+
+    def rename_testdata_to_test_group_yaml(data: dict[str, Any], path: str) -> bool:
+        old, new = "testdata.yaml", "test_group.yaml"
+        if old in data:
+            print_path = f" ({path[1:]})" if len(path) > 1 else ""
+            bar.log(f"changing '{old}' to '{new}' in generators.yaml{print_path}")
+            ryaml_replace(data, old, new)
+            return True
+        return False
+
+    def upgrade_generated_test_group_yaml(data: dict[str, Any], path: str) -> bool:
+        changed = False
+        if "test_group.yaml" in data:
+            test_group_yaml = cast(CommentedMap, data["test_group.yaml"])
+            print_path = f" ({path[1:]})" if len(path) > 1 else ""
+
+            rename = [
+                ("output_validator_flags", OutputValidator.args_key),
+                ("input_validator_flags", InputValidator.args_key),
+            ]
+            for old, new in rename:
+                if old in test_group_yaml:
+                    if new in test_group_yaml:
+                        bar.error(
+                            f"can't change '{old}', '{new}' already exists in generators.yaml{print_path}",
+                            resume=True,
+                        )
+                        continue
+                    bar.log(f"changing '{old}' to '{new}' in generators.yaml{print_path}")
+                    ryaml_replace(test_group_yaml, old, new)
+                    changed = True
+                if new in test_group_yaml and isinstance(test_group_yaml[new], str):
+                    test_group_yaml[new] = args_split(test_group_yaml[new])
+                    changed = True
+        return changed
+
+    def replace_hint_desc_in_test_cases(data: dict[str, Any], path: str) -> bool:
+        changed = False
+        if "data" in data and data["data"]:
+            children = data["data"] if isinstance(data["data"], list) else [data["data"]]
+            for dictionary in children:
+                for child_name, child_data in sorted(dictionary.items()):
+                    if not child_name:
+                        child_name = '""'
+                    if child_data and generate.is_testcase(child_data):
+                        if "desc" in child_data:
+                            ryaml_get_or_add(child_data, "yaml")["description"] = child_data["desc"]
+                            ryaml_filter(child_data, "desc")
+                            bar.log(
+                                f"moving 'desc' inside 'yaml' in generators.yaml ({path}.{child_name})"
+                            )
+                            changed = True
+                        if "hint" in child_data:
+                            ryaml_get_or_add(child_data, "yaml")["hint"] = child_data["hint"]
+                            ryaml_filter(child_data, "hint")
+                            bar.log(
+                                f"moving 'hint' inside 'yaml' in generators.yaml ({path}.{child_name})"
+                            )
+                            changed = True
+        return changed
+
+    changed |= apply_recursively(rename_testdata_to_test_group_yaml, yaml_data, "")
+    changed |= apply_recursively(upgrade_generated_test_group_yaml, yaml_data, "")
+    changed |= apply_recursively(replace_hint_desc_in_test_cases, yaml_data, "")
+
+    if changed:
+        write_yaml(yaml_data, generators_yaml)
+
+
+def upgrade_statement(problem_path: Path, bar: ProgressBar) -> None:
+    if (problem_path / "problem_statement").is_dir():
+        if (problem_path / "statement").exists():
+            bar.error("can't rename 'problem_statement/', 'statement/' already exists", resume=True)
+        else:
+            bar.log("renaming 'problem_statement/' to 'statement/'")
+            (problem_path / "problem_statement").rename(problem_path / "statement")
+
+    origin = problem_path / "statement"
+    move = [
+        ("solution*", "solution"),
+        ("problem-slide*", "problem_slide"),
+    ]
+    for glob, dest_name in move:
+        dest_path = problem_path / dest_name
+        if dest_path.exists() and not dest_path.is_dir():
+            bar.error(f"'{dest_name}' is not a directory", resume=True)
+            continue
+
+        for f in origin.glob(glob):
+            dest = dest_path / f.relative_to(origin)
+            if dest.exists():
+                bar.error(
+                    f"can't move '{f.relative_to(problem_path)}', '{dest.relative_to(problem_path)}' already exists",
+                    resume=True,
+                )
+                continue
+            bar.log(f"moving '{f.relative_to(problem_path)}' to '{dest.relative_to(problem_path)}'")
+            dest_path.mkdir(parents=True, exist_ok=True)
+            shutil.move(f, dest)
+
+
+def upgrade_format_validators(problem_path: Path, bar: ProgressBar) -> None:
+    rename = [
+        ("input_format_validators", InputValidator.source_dir),
+        ("answer_format_validators", AnswerValidator.source_dir),
+    ]
+    for old_name, new_name in rename:
+        old_path = problem_path / old_name
+        new_path = problem_path / new_name
+        if old_path.is_dir():
+            if new_path.exists():
+                bar.error(f"can't rename '{old_name}', '{new_name}' already exists", resume=True)
+                continue
+            bar.log(f"renaming '{old_name}' to '{new_name}'")
+            old_path.rename(new_path)
+
+
+def upgrade_output_validators(problem_path: Path, bar: ProgressBar) -> None:
+    if (problem_path / "output_validators").is_dir():
+        if (problem_path / OutputValidator.source_dir).exists():
+            bar.error(
+                f"can't rename 'output_validators/', '{OutputValidator.source_dir}/' already exists",
+                resume=True,
+            )
+            return
+        content = [*(problem_path / "output_validators").iterdir()]
+        if len(content) == 1 and content[0].is_dir():
+            bar.log(
+                f"renaming 'output_validators/{content[0].name}' to '{OutputValidator.source_dir}/'"
+            )
+            _move_dir(content[0], problem_path / OutputValidator.source_dir)
+        else:
+            bar.log(f"renaming 'output_validators/' to '{OutputValidator.source_dir}/'")
+            (problem_path / "output_validators").rename(problem_path / OutputValidator.source_dir)
+
+
+def upgrade_problem_yaml(problem_path: Path, bar: ProgressBar) -> None:
+    assert (problem_path / "problem.yaml").exists()
+    data = cast(CommentedMap, read_yaml(problem_path / "problem.yaml"))
+
+    if (
+        "problem_format_version" not in data
+        or data["problem_format_version"] != config.SPEC_VERSION
+    ):
+        bar.log("set 'problem_format_version' in problem.yaml")
+        data.insert(0, "problem_format_version", config.SPEC_VERSION)
+
+    if "validation" in data:
+        if "type" in data:
+            bar.error(
+                "can't change 'validation', 'type' already exists in problem.yaml", resume=True
+            )
+        else:
+            bar.log("change 'validation' to 'type' in problem.yaml")
+            type = CommentedSeq()
+            if "interactive" in data["validation"]:
+                type.append("interactive")
+            if "multi-pass" in data["validation"]:
+                type.append("multi-pass")
+            if not type:
+                type.append("pass-fail")
+            # "type" comes before "name" in the spec
+            pos = list(data.keys()).index("name") if "name" in data else 0
+            data.insert(pos, "type", type if len(type) > 1 else type[0])
+            ryaml_filter(data, "validation")
+
+    if "author" in data:
+        if "credits" in data:
+            bar.error(
+                "can't change 'author', 'credits' already exists in problem.yaml", resume=True
+            )
+        else:
+            bar.log("change 'author' to 'credits.authors' in problem.yaml")
+            authors = CommentedSeq(
+                name.strip() for name in data["author"].replace("and", ",").split(",")
+            )
+            credits = CommentedMap({"authors": authors if len(authors) > 1 else authors[0]})
+            ryaml_replace(data, "author", "credits", credits)
+
+    if "source_url" in data:
+        if "source" not in data:
+            ryaml_replace(data, "source_url", "source")
+        elif data["source"]:
+            bar.log("change 'source_url' to 'source.url' in problem.yaml")
+            old_pos = list(data.keys()).index("source")
+            old_source = ryaml_filter(data, "source")
+            old_source_url = ryaml_filter(data, "source_url")
+            data.insert(
+                old_pos, "source", CommentedMap({"name": old_source, "url": old_source_url})
+            )
+        else:
+            bar.log("remove empty 'source(_url)' in problem.yaml")
+            ryaml_filter(data, "source")
+            ryaml_filter(data, "source_url")
+
+    if "limits" in data:
+        limits = data["limits"]
+        if "time_multiplier" in limits or "time_safety_margin" in limits:
+            if "time_multipliers" in limits:
+                bar.error(
+                    "can't change 'limits.time_multiplier/limits.time_safety_margin', 'limits.time_multipliers' already exists in problem.yaml",
+                    resume=True,
+                )
+            else:
+                bar.log(
+                    "change 'limits.time_multiplier/limits.time_safety_margin' to 'limits.time_multipliers'"
+                )
+                time_multipliers = CommentedMap()
+
+                if "time_multiplier" in limits:
+                    if limits["time_multiplier"] != 2:  # Skip if it's equal to the new default
+                        time_multipliers["ac_to_time_limit"] = limits["time_multiplier"]
+                    ryaml_filter(limits, "time_multiplier")
+
+                if "time_safety_margin" in limits:
+                    if limits["time_safety_margin"] != 1.5:  # Skip if it's equal to the new default
+                        time_multipliers["time_limit_to_tle"] = limits["time_safety_margin"]
+                    ryaml_filter(limits, "time_safety_margin")
+
+                if time_multipliers:
+                    limits["time_multipliers"] = time_multipliers
+                # If both time multipliers are default, remove the comments (this only works if
+                # there are no other limits configured, but that's the most common case anyway)
+                if not limits:
+                    ryaml_filter(data, "limits")
+
+    def add_args(new_data: dict[str, Any]) -> bool:
+        if OutputValidator.args_key in new_data:
+            bar.error(
+                f"can't change 'validator_flags', '{OutputValidator.args_key}' already exists in test_group.yaml",
+                resume=True,
+            )
+            return False
+        bar.log(f"change 'validator_flags' to '{OutputValidator.args_key}' in test_group.yaml")
+        validator_flags = data["validator_flags"]
+        new_data[OutputValidator.args_key] = (
+            args_split(validator_flags) if isinstance(validator_flags, str) else validator_flags
+        )
+        ryaml_filter(data, "validator_flags")
+        return True
+
+    if "validator_flags" in data:
+        if data["validator_flags"]:
+            generators_path = problem_path / "generators" / "generators.yaml"
+            if generators_path.exists():
+                generators_data = cast(CommentedMap, read_yaml(generators_path))
+
+                if "test_group.yaml" not in generators_data:
+                    if "data" in generators_data:
+                        # insert before data
+                        pos = list(generators_data.keys()).index("data")
+                        generators_data.insert(pos, "test_group.yaml", CommentedMap())
+                    else:
+                        # insert at end
+                        generators_data["test_group.yaml"] = CommentedMap()
+                if add_args(generators_data["test_group.yaml"]):
+                    write_yaml(generators_data, generators_path)
+            else:
+                test_group_path = problem_path / "data" / "test_group.yaml"
+                test_group_data = (
+                    cast(CommentedMap, read_yaml(test_group_path))
+                    if test_group_path.exists()
+                    else CommentedMap()
+                )
+
+                if add_args(test_group_data):
+                    write_yaml(test_group_data, test_group_path)
+        else:
+            ryaml_filter(data, "validator_flags")
+
+    timelimit_path = problem_path / ".timelimit"
+    if timelimit_path.is_file():
+        if "limits" not in data:
+            data["limits"] = CommentedMap()
+        if "time_limit" in data["limits"]:
+            bar.error(
+                "can't change '.timelimit' file, 'limits.time_limit' already exists in problem.yaml",
+                resume=True,
+            )
+        else:
+            bar.log("change '.timelimit' file to 'limits.time_limit' in problem.yaml")
+            data["limits"]["time_limit"] = float(timelimit_path.read_text())
+            timelimit_path.unlink()
+
+    domjudge_path = problem_path / "domjudge-problem.ini"
+    if domjudge_path.is_file():
+        time_limit = None
+        for line in domjudge_path.read_text().splitlines():
+            key, var = map(str.strip, line.strip().split("="))
+            if (var[0] == '"' or var[0] == "'") and (var[-1] == '"' or var[-1] == "'"):
+                var = var[1:-1]
+            if key == "timelimit":
+                time_limit = float(var)
+        if time_limit is not None:
+            if "limits" not in data:
+                data["limits"] = CommentedMap()
+            if "time_limit" in data["limits"]:
+                bar.error(
+                    "can't change 'domjudge-problem.ini' file, 'limits.time_limit' already exists in problem.yaml",
+                    resume=True,
+                )
+            else:
+                bar.log("change 'domjudge-problem.ini' file to 'limits.time_limit' in problem.yaml")
+                data["limits"]["time_limit"] = time_limit
+                domjudge_path.unlink()
+
+    write_yaml(data, problem_path / "problem.yaml")
+
+
+def _upgrade(problem_path: Path, bar: ProgressBar) -> None:
+    bar.start(problem_path)
+
+    upgrade_data(problem_path, bar)
+    rename_testdata_to_test_group_yaml(problem_path, bar)
+    upgrade_test_group_yaml(problem_path, bar)
+    upgrade_generators_yaml(problem_path, bar)
+    upgrade_statement(problem_path, bar)
+    upgrade_format_validators(problem_path, bar)
+    upgrade_output_validators(problem_path, bar)
+    upgrade_problem_yaml(problem_path, bar)
+
+    bar.done()
+
+
+def upgrade() -> None:
+    if not has_ryaml:
+        error("upgrade needs the ruamel.yaml python3 library. Install python[3]-ruamel.yaml.")
+        return
+    cwd = Path().cwd()
+
+    def is_problem_directory(path: Path) -> bool:
+        return (path / "problem.yaml").is_file()
+
+    if is_problem_directory(cwd):
+        paths = [cwd]
+    else:
+        paths = [p for p in cwd.iterdir() if is_problem_directory(p)]
+
+    bar = ProgressBar("upgrade", items=["contest.yaml", *paths])
+
+    bar.start("contest.yaml")
+    if (cwd / "contest.yaml").is_file():
+        upgrade_contest_yaml(cwd / "contest.yaml", bar)
+    bar.done()
+
+    for path in paths:
+        _upgrade(path, bar)
+
+    bar.finalize()
diff --git a/bin/util.py b/bin/util.py
index 06e146b4d..aa0e03fed 100644
--- a/bin/util.py
+++ b/bin/util.py
@@ -4,6 +4,7 @@
 import errno
 import hashlib
 import os
+import re
 import secrets
 import shutil
 import signal
@@ -13,12 +14,11 @@
 import threading
 import time
 from enum import Enum
-from collections.abc import Sequence
-from collections.abc import Callable
+from collections.abc import Callable, Mapping, Sequence
 from pathlib import Path
 from typing import (
-    Any,
     cast,
+    Any,
     Iterable,
     Literal,
     NoReturn,
@@ -46,6 +46,7 @@
     ryaml.default_flow_style = False
     ryaml.indent(mapping=2, sequence=4, offset=2)
     ryaml.width = sys.maxsize
+    ryaml.preserve_quotes = True
 except Exception:
     has_ryaml = False
 
@@ -58,6 +59,21 @@
 ruamel_lock = threading.Lock()
 
 
+try:
+    import questionary
+    from prompt_toolkit.document import Document
+
+    has_questionary = True
+
+    class EmptyValidator(questionary.Validator):
+        def validate(self, document: Document) -> None:
+            if len(document.text) == 0:
+                raise questionary.ValidationError(message="Please enter a value")
+
+except Exception:
+    has_questionary = False
+
+
 def is_windows() -> bool:
     return sys.platform in ["win32", "cygwin"]
 
@@ -119,7 +135,9 @@ def error(msg: Any) -> None:
     config.n_error += 1
 
 
-def fatal(msg: Any, *, force: bool = threading.active_count() > 1) -> NoReturn:
+def fatal(msg: Any, *, force: Optional[bool] = None) -> NoReturn:
+    if force is None:
+        force = threading.active_count() > 1
     print(f"\n{Fore.RED}FATAL ERROR: {msg}{Style.RESET_ALL}", file=sys.stderr)
     exit1(force)
 
@@ -168,7 +186,7 @@ def message(
 
 # A simple bar that only holds a task prefix
 class PrintBar:
-    def __init__(self, task: str | Path):
+    def __init__(self, task: Optional[str | Path] = None):
         self.task = task
 
     def log(self, msg: Any, item: Optional[ITEM_TYPE] = None) -> None:
@@ -584,6 +602,9 @@ def finalize(
         return self.global_logged and not suppress_newline
 
 
+BAR_TYPE = PrintBar | ProgressBar
+
+
 # Given a command line argument, return the first match:
 # - absolute
 # - relative to the 'type' directory for the current problem
@@ -640,6 +661,13 @@ def path_size(path: Path) -> int:
         return sum(f.stat().st_size for f in path.rglob("*") if f.exists())
 
 
+def drop_suffix(path: Path, suffixes: Sequence[str]) -> Path:
+    for suffix in suffixes:
+        if path.name.endswith(suffix):
+            return path.with_name(path.name.removesuffix(suffix))
+    return path
+
+
 # Drops the first two path components <problem>/<type>/
 def print_name(path: Path, keep_type: bool = False) -> str:
     return str(Path(*path.parts[1 if keep_type else 2 :]))
@@ -709,6 +737,45 @@ def ryaml_get_or_add(
         assert isinstance(value, t)
         return value  # type: ignore
 
+    # This tries to preserve the correct comments.
+    def ryaml_filter(data: Any, remove: str) -> Any:
+        assert isinstance(data, ruamel.yaml.comments.CommentedMap)
+        remove_index = list(data.keys()).index(remove)
+        if remove_index == 0:
+            return data.pop(remove)
+
+        curr = data
+        prev_key = list(data.keys())[remove_index - 1]
+        while isinstance(curr[prev_key], list | dict) and len(curr[prev_key]):
+            # Try to remove the comment from the last element in the preceding list/dict
+            curr = curr[prev_key]
+            if isinstance(curr, list):
+                prev_key = len(curr) - 1
+            else:
+                prev_key = list(curr.keys())[-1]
+
+        if remove in data.ca.items:
+            # Move the comment that belongs to the removed key (which comes _after_ the removed key)
+            # to the preceding key
+            curr.ca.items[prev_key] = data.ca.items.pop(remove)
+        elif prev_key in curr.ca.items:
+            # If the removed key does not have a comment,
+            # the comment after the previous key should be removed
+            curr.ca.items.pop(prev_key)
+
+        return data.pop(remove)
+
+    # Insert a new key before an old key, then remove the old key.
+    # If new_value is not given, the default is to simply rename the old key to the new key.
+    def ryaml_replace(data: Any, old_key: str, new_key: str, new_value: Any = None) -> None:
+        assert isinstance(data, ruamel.yaml.comments.CommentedMap)
+        if new_value is None:
+            new_value = data[old_key]
+        data.insert(list(data.keys()).index(old_key), new_key, new_value)
+        data.pop(old_key)
+        if old_key in data.ca.items:
+            data.ca.items[new_key] = data.ca.items.pop(old_key)
+
 
 # Only allow one thread to write at the same time. Else, e.g., generating test cases in parallel goes wrong.
 write_yaml_lock = threading.Lock()
@@ -726,7 +793,7 @@ def write_yaml(
             exit(1)
         if path is None:
             return yamllib.dump(data)
-        with open(path, "w") as stream:
+        with path.open("w") as stream:
             yamllib.dump(data, stream)
         return None
     with write_yaml_lock:
@@ -764,7 +831,7 @@ def parse_optional_setting(yaml_data: dict[str, Any], key: str, t: type[T]) -> O
         if isinstance(value, int) and t is float:
             value = float(value)
         if isinstance(value, t):
-            return cast(T, value)
+            return value
         if value == "" and (t is list or t is dict):
             # handle empty yaml keys
             return t()
@@ -772,9 +839,15 @@ def parse_optional_setting(yaml_data: dict[str, Any], key: str, t: type[T]) -> O
     return None
 
 
-def parse_setting(yaml_data: dict[str, Any], key: str, default: T) -> T:
+def parse_setting(
+    yaml_data: dict[str, Any], key: str, default: T, constraint: Optional[str] = None
+) -> T:
     value = parse_optional_setting(yaml_data, key, type(default))
-    return default if value is None else value
+    result = default if value is None else value
+    if constraint and not eval(f"{result} {constraint}"):
+        warn(f"value for '{key}' in problem.yaml should be {constraint} but is {value}. SKIPPED.")
+        return default
+    return result
 
 
 def parse_optional_list_setting(yaml_data: dict[str, Any], key: str, t: type[T]) -> list[T]:
@@ -788,11 +861,86 @@ def parse_optional_list_setting(yaml_data: dict[str, Any], key: str, t: type[T])
                     f"some values for key '{key}' in problem.yaml do not have type {t.__name__}. SKIPPED."
                 )
                 return []
+            if not value:
+                warn(f"value for '{key}' in problem.yaml should not be an empty list.")
             return value
         warn(f"incompatible value for key '{key}' in problem.yaml. SKIPPED.")
     return []
 
 
+def parse_deprecated_setting(
+    yaml_data: dict[str, Any], key: str, new: Optional[str] = None
+) -> None:
+    if key in yaml_data:
+        use = f", use '{new}' instead" if new else ""
+        warn(f"key '{key}' is deprecated{use}. SKIPPED.")
+        yaml_data.pop(key)
+
+
+def _ask_variable(name: str, default: Optional[str] = None, allow_empty: bool = False) -> str:
+    if config.args.defaults:
+        if not default and not allow_empty:
+            fatal(f"{name} has no default")
+        return default or ""
+    while True:
+        val = input(f"{name}: ")
+        val = val or default or ""
+        if val != "" or allow_empty:
+            return val
+
+
+def ask_variable_string(name: str, default: Optional[str] = None, allow_empty: bool = False) -> str:
+    if has_questionary:
+        try:
+            validate = None if allow_empty else EmptyValidator
+            return cast(
+                str,
+                questionary.text(name + ":", default=default or "", validate=validate).unsafe_ask(),
+            )
+        except KeyboardInterrupt:
+            fatal("Running interrupted")
+    else:
+        text = f" ({default})" if default else ""
+        return _ask_variable(name + text, default if default else "", allow_empty)
+
+
+def ask_variable_bool(name: str, default: bool = True) -> bool:
+    if has_questionary:
+        try:
+            return cast(
+                bool,
+                questionary.confirm(name + "?", default=default, auto_enter=False).unsafe_ask(),
+            )
+        except KeyboardInterrupt:
+            fatal("Running interrupted")
+    else:
+        text = " (Y/n)" if default else " (y/N)"
+        return _ask_variable(name + text, "Y" if default else "N").lower()[0] == "y"
+
+
+def ask_variable_choice(name: str, choices: Sequence[str], default: Optional[str] = None) -> str:
+    if has_questionary:
+        try:
+            plain = questionary.Style([("selected", "noreverse")])
+            return cast(
+                str,
+                questionary.select(
+                    name + ":", choices=choices, default=default, style=plain
+                ).unsafe_ask(),
+            )
+        except KeyboardInterrupt:
+            fatal("Running interrupted")
+    else:
+        default = default or choices[0]
+        text = f" ({default})" if default else ""
+        while True:
+            got = _ask_variable(name + text, default if default else "")
+            if got in choices:
+                return got
+            else:
+                warn(f"unknown option: {got}")
+
+
 # glob, but without hidden files
 def glob(path: Path, expression: str, include_hidden: bool = False) -> list[Path]:
     def keep(p: Path) -> bool:
@@ -847,74 +995,120 @@ def strip_newline(s: str) -> str:
 
 
 # When output is True, copy the file when args.cp is true.
-def ensure_symlink(link: Path, target: Path, output: bool = False, relative: bool = False) -> None:
-    # on windows copy if necessary
-    if is_windows() and not windows_can_symlink:
-        if link.exists() or link.is_symlink():
-            link.unlink()
-        shutil.copyfile(target, link)
-        return
+def ensure_symlink(link: Path, target: Path, output: bool = False, relative: bool = False) -> bool:
+    try:
+        # on windows copy if necessary
+        if is_windows() and not windows_can_symlink:
+            if link.exists() or link.is_symlink():
+                link.unlink()
+            shutil.copyfile(target, link)
+            return True
 
-    # For output files: copy them on Windows, or when --cp is passed.
-    if output and config.args.cp:
-        if link.exists() or link.is_symlink():
-            link.unlink()
-        shutil.copyfile(target, link)
-        return
+        # For output files: copy them on Windows, or when --cp is passed.
+        if output and config.args.cp:
+            if link.exists() or link.is_symlink():
+                link.unlink()
+            shutil.copyfile(target, link)
+            return True
 
-    # Do nothing if link already points to the right target.
-    if link.is_symlink() and link.resolve() == target.resolve():
-        is_absolute = os.readlink(link)
-        if not relative and is_absolute:
-            return
-        # if relative and not is_absolute: return
+        # Do nothing if link already points to the right target.
+        if link.is_symlink() and link.resolve() == target.resolve():
+            is_absolute = os.readlink(link)
+            if not relative and is_absolute:
+                return True
+            # if relative and not is_absolute: return
 
-    if link.is_symlink() or link.exists():
-        if link.is_dir() and not link.is_symlink():
-            shutil.rmtree(link)
+        if link.is_symlink() or link.exists():
+            if link.is_dir() and not link.is_symlink():
+                shutil.rmtree(link)
+            else:
+                link.unlink()
+
+        # for windows the symlink needs to know if it points to a directory or file
+        if relative:
+            # Rewrite target to be relative to link.
+            # Use os.path.relpath instead of Path.relative_to for non-subdirectories.
+            link.symlink_to(os.path.relpath(target, link.parent), target.is_dir())
         else:
-            link.unlink()
+            link.symlink_to(target.resolve(), target.is_dir())
+        return True
+    except (FileNotFoundError, FileExistsError):
+        # this must be a race condition
+        return False
 
-    # for windows the symlink needs to know if it points to a directory or file
-    if relative:
-        # Rewrite target to be relative to link.
-        # Use os.path.relpath instead of Path.relative_to for non-subdirectories.
-        link.symlink_to(os.path.relpath(target, link.parent), target.is_dir())
-    else:
-        link.symlink_to(target.resolve(), target.is_dir())
+
+def has_substitute(
+    inpath: Path, pattern: re.Pattern[str] = config.BAPCTOOLS_SUBSTITUTE_REGEX
+) -> bool:
+    try:
+        data = inpath.read_text()
+    except UnicodeDecodeError:
+        return False
+    return pattern.search(data) is not None
 
 
-def substitute(data: str, variables: Optional[dict[str, Optional[str]]]) -> str:
+def substitute(
+    data: str,
+    variables: Optional[Mapping[str, Optional[str]]],
+    *,
+    pattern: re.Pattern[str] = config.BAPCTOOLS_SUBSTITUTE_REGEX,
+    bar: BAR_TYPE = PrintBar(),
+) -> str:
     if variables is None:
-        return data
-    for key, value in variables.items():
-        data = data.replace("{%" + key + "%}", str(value or ""))
-    return data
+        variables = {}
+
+    def substitute_function(match: re.Match[str]) -> str:
+        name = match.group(1)
+        if name in variables:
+            return str(variables[name]) if variables[name] is not None else ""
+        else:
+            variable = match.group()
+            bar.warn(f"Found pattern '{variable}' but no substitution was provided. Skipped.")
+            return variable
+
+    return pattern.sub(substitute_function, data)
 
 
 def copy_and_substitute(
-    inpath: Path, outpath: Path, variables: Optional[dict[str, Optional[str]]]
+    inpath: Path,
+    outpath: Path,
+    variables: Optional[Mapping[str, Optional[str]]],
+    *,
+    pattern: re.Pattern[str] = config.BAPCTOOLS_SUBSTITUTE_REGEX,
+    bar: BAR_TYPE = PrintBar(),
 ) -> None:
     try:
         data = inpath.read_text()
     except UnicodeDecodeError:
         # skip this file
-        log(f'File "{inpath}" is not a text file.')
+        bar.log(f'File "{inpath}" is not a text file.')
         return
-    data = substitute(data, variables)
+    data = substitute(data, variables, pattern=pattern, bar=bar)
     if outpath.is_symlink():
         outpath.unlink()
     outpath.write_text(data)
 
 
-def substitute_file_variables(path: Path, variables: Optional[dict[str, Optional[str]]]) -> None:
-    copy_and_substitute(path, path, variables)
+def substitute_file_variables(
+    path: Path,
+    variables: Optional[Mapping[str, Optional[str]]],
+    *,
+    pattern: re.Pattern[str] = config.BAPCTOOLS_SUBSTITUTE_REGEX,
+    bar: BAR_TYPE = PrintBar(),
+) -> None:
+    copy_and_substitute(path, path, variables, pattern=pattern, bar=bar)
 
 
-def substitute_dir_variables(dirname: Path, variables: Optional[dict[str, Optional[str]]]) -> None:
+def substitute_dir_variables(
+    dirname: Path,
+    variables: Optional[Mapping[str, Optional[str]]],
+    *,
+    pattern: re.Pattern[str] = config.BAPCTOOLS_SUBSTITUTE_REGEX,
+    bar: BAR_TYPE = PrintBar(),
+) -> None:
     for path in dirname.rglob("*"):
         if path.is_file():
-            substitute_file_variables(path, variables)
+            substitute_file_variables(path, variables, pattern=pattern, bar=bar)
 
 
 # copies a directory recursively and substitutes {%key%} by their value in text files
@@ -922,12 +1116,14 @@ def substitute_dir_variables(dirname: Path, variables: Optional[dict[str, Option
 def copytree_and_substitute(
     src: Path,
     dst: Path,
-    variables: Optional[dict[str, Optional[str]]],
+    variables: Optional[Mapping[str, Optional[str]]],
     exist_ok: bool = True,
     *,
     preserve_symlinks: bool = True,
     base: Optional[Path] = None,
     skip: Optional[Iterable[Path]] = None,
+    pattern: re.Pattern[str] = config.BAPCTOOLS_SUBSTITUTE_REGEX,
+    bar: BAR_TYPE = PrintBar(),
 ) -> None:
     if base is None:
         base = src
@@ -955,6 +1151,8 @@ def copytree_and_substitute(
                     preserve_symlinks=preserve_symlinks,
                     base=base,
                     skip=skip,
+                    pattern=pattern,
+                    bar=bar,
                 )
             except OSError as why:
                 errors.append((srcFile, dstFile, str(why)))
@@ -966,11 +1164,11 @@ def copytree_and_substitute(
             raise Exception(errors)
 
     elif dst.exists():
-        warn(f'File "{dst}" already exists, skipping...')
+        bar.warn(f'File "{dst}" already exists, skipping...')
     else:
         try:
             data = src.read_text()
-            data = substitute(data, variables)
+            data = substitute(data, variables, pattern=pattern, bar=bar)
             dst.write_text(data)
         except UnicodeDecodeError:
             # Do not substitute for binary files.
@@ -1048,6 +1246,14 @@ def limit_setter(
     group: Optional[int] = None,
     cores: Literal[False] | list[int] = False,
 ) -> Callable[[], None]:
+    if memory_limit:
+        assert command is not None
+        jvm = Path(command[0]).name in ["java", "javac", "kotlin", "kotlinc"]
+
+    if group is not None:
+        assert not is_windows()
+        assert not is_mac()
+
     def setlimits() -> None:
         if timeout:
             resource.setrlimit(resource.RLIMIT_CPU, (timeout + 1, timeout + 1))
@@ -1058,18 +1264,14 @@ def setlimits() -> None:
                 resource.RLIMIT_STACK, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)
             )
 
-        if memory_limit:
-            assert command is not None
-            if Path(command[0]).name not in ["java", "javac", "kotlin", "kotlinc"] and not is_bsd():
-                resource.setrlimit(
-                    resource.RLIMIT_AS,
-                    (memory_limit * 1024 * 1024, memory_limit * 1024 * 1024),
-                )
+        if memory_limit and not jvm and not is_bsd():
+            resource.setrlimit(
+                resource.RLIMIT_AS,
+                (memory_limit * 1024 * 1024, memory_limit * 1024 * 1024),
+            )
 
         # TODO: with python 3.11 it is better to use Popen(process_group=group)
         if group is not None:
-            assert not is_windows()
-            assert not is_mac()
             os.setpgid(0, group)
 
         if cores is not False and not is_windows() and not is_bsd():
@@ -1119,6 +1321,10 @@ def _try_wait(self, wait_flags: int) -> tuple[int, int]:
             return (pid, sts)
 
 
+class AbortException(Exception):
+    pass
+
+
 def default_exec_code_map(returncode: int) -> ExecStatus:
     if returncode == 0:
         return ExecStatus.ACCEPTED
@@ -1183,12 +1389,14 @@ def exec_command(
         memory = None
 
     process: Optional[ResourcePopen] = None
+    old_handler = None
 
     def interrupt_handler(sig: Any, frame: Any) -> None:
         nonlocal process
         if process is not None:
             process.kill()
-        fatal("Running interrupted", force=True)
+        if callable(old_handler):
+            old_handler(sig, frame)
 
     if threading.current_thread() is threading.main_thread():
         old_handler = signal.signal(signal.SIGINT, interrupt_handler)
@@ -1227,10 +1435,7 @@ def interrupt_handler(sig: Any, frame: Any) -> None:
 
     # -2 corresponds to SIGINT, i.e. keyboard interrupt / CTRL-C.
     if process.returncode == -2:
-        if threading.current_thread() is threading.main_thread():
-            fatal("Running interrupted")
-        else:
-            raise ChildProcessError()
+        raise AbortException()
 
     def maybe_crop(s: str) -> str:
         return crop_output(s) if crop else s
@@ -1267,7 +1472,7 @@ def combine_hashes(values: Sequence[str]) -> str:
     return hasher.hexdigest()
 
 
-def combine_hashes_dict(d: dict[str, Optional[str]]) -> str:
+def combine_hashes_dict(d: Mapping[str, Optional[str]]) -> str:
     hasher = hashlib.sha512(usedforsecurity=False)
     for key, value in d.items():
         hasher.update(key.encode())
@@ -1287,7 +1492,7 @@ def hash_file_content(file: Path, buffer_size: int = 65536) -> str:
         raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(file))
     sha = hashlib.sha512(usedforsecurity=False)
 
-    with open(file, "rb") as f:
+    with file.open("rb") as f:
         while True:
             data = f.read(buffer_size)
             if not data:
@@ -1305,7 +1510,7 @@ def hash_file(file: Path, buffer_size: int = 65536) -> str:
     sha.update(len(name).to_bytes(8, "big"))
     sha.update(name)
 
-    with open(file, "rb") as f:
+    with file.open("rb") as f:
         while True:
             data = f.read(buffer_size)
             if not data:
diff --git a/bin/validate.py b/bin/validate.py
index 15f18cf62..c3c8e9128 100644
--- a/bin/validate.py
+++ b/bin/validate.py
@@ -2,10 +2,15 @@
 from util import *
 from enum import Enum
 from collections.abc import Sequence
-from typing import Final
+from pathlib import Path
+from typing import Final, Optional, TYPE_CHECKING
 
+import config
 import program
-import testcase
+
+if TYPE_CHECKING:  # Prevent circular import: https://stackoverflow.com/a/39757388
+    import run
+    import testcase
 
 
 class Mode(Enum):
@@ -105,6 +110,7 @@ def __init__(
                 "memory": problem.limits.validation_memory,
             },
             skip_double_build_warning=skip_double_build_warning,
+            substitute_constants=True,
         )
         assert self.__class__ is not Validator  # Validator is abstract and may not be instantiated
 
@@ -168,7 +174,7 @@ def format_exec_code_map(returncode):
             return ExecStatus.ERROR
 
         if self.language == "checktestdata":
-            with main_path.open() as main_file:
+            with main_path.open("rb") as main_file:
                 return self._exec_command(
                     self.run_command,
                     exec_code_map=format_exec_code_map,
@@ -203,10 +209,10 @@ def _exec_helper(self, *args, cwd, **kwargs):
 
     def run(
         self,
-        testcase: testcase.Testcase,
-        mode,
+        testcase: "testcase.Testcase",
+        mode: Mode,
         constraints: Optional[ConstraintsDict] = None,
-        args=None,
+        args: Optional[list[str]] = None,
     ) -> ExecResult:
         raise Exception("Abstract method")
 
@@ -220,19 +226,21 @@ class InputValidator(Validator):
     Also supports checktestdata and viva files, with different invocation.
     """
 
-    def __init__(self, problem, path, **kwargs):
-        super().__init__(problem, path, "input_validators", **kwargs)
+    validator_type: Final[str] = "input"
 
-    validator_type = "input"
+    source_dir: Final[str] = "input_validators"
 
-    source_dirs = ["input_validators", "input_format_validators"]
+    args_key: Final[str] = "input_validator_args"
+
+    def __init__(self, problem, path, **kwargs):
+        super().__init__(problem, path, InputValidator.source_dir, **kwargs)
 
     def run(
         self,
-        testcase,
-        mode=Mode.INPUT,
+        testcase: "testcase.Testcase",
+        mode: Mode = Mode.INPUT,
         constraints: Optional[ConstraintsDict] = None,
-        args=None,
+        args: Optional[list[str]] = None,
     ) -> ExecResult:
         """
         Arguments
@@ -257,7 +265,7 @@ def run(
 
         invocation = self.run_command.copy()
 
-        with testcase.in_path.open() as in_file:
+        with testcase.in_path.open("rb") as in_file:
             ret = self._exec_helper(
                 invocation + arglist,
                 exec_code_map=validator_exec_code_map,
@@ -273,26 +281,29 @@ def run(
 
 class AnswerValidator(Validator):
     """
-    Validate the default answer file (such as "testcase.ans"), called as:
+    Validate the default answer file "testcase.ans" (or "testcase.out" if it exists), called as:
 
         ./validator input < answer.
 
     Also supports checktestdata and viva files, with different invocation.
     """
 
-    def __init__(self, problem, path, **kwargs):
-        super().__init__(problem, path, "answer_validators", **kwargs)
+    validator_type: Final[str] = "answer"
 
-    validator_type = "answer"
+    source_dir: Final[str] = "answer_validators"
 
-    source_dirs = ["answer_validators", "answer_format_validators"]
+    # use output_validator_args as well
+    args_key: Final[str] = "output_validator_args"
+
+    def __init__(self, problem, path, **kwargs):
+        super().__init__(problem, path, AnswerValidator.source_dir, **kwargs)
 
     def run(
         self,
-        testcase,
-        mode=Mode.ANSWER,
+        testcase: "testcase.Testcase",
+        mode: Mode = Mode.ANSWER,
         constraints: Optional[ConstraintsDict] = None,
-        args=None,
+        args: Optional[list[str]] = None,
     ) -> ExecResult:
         assert self.run_command is not None, "Validator should be built before running it"
 
@@ -310,7 +321,7 @@ def run(
 
         invocation = self.run_command + [testcase.in_path.resolve()]
 
-        with testcase.ans_path.open() as ans_file:
+        with testcase.ans_path.open("rb") as ans_file:
             ret = self._exec_helper(
                 invocation + arglist,
                 exec_code_map=validator_exec_code_map,
@@ -331,20 +342,21 @@ class OutputValidator(Validator):
        ./validator input answer feedbackdir [arguments from problem.yaml] < output
     """
 
-    def __init__(self, problem, path, **kwargs):
-        super().__init__(problem, path, "output_validators", **kwargs)
+    validator_type: Final[str] = "output"
 
-    validator_type = "output"
+    source_dir: Final[str] = "output_validator"
 
-    # TODO #424: We should not support multiple output validators inside output_validator/.
-    source_dirs = ["output_validator", "output_validators"]
+    args_key: Final[str] = "output_validator_args"
+
+    def __init__(self, problem, path, **kwargs):
+        super().__init__(problem, path, OutputValidator.source_dir, **kwargs)
 
     def run(
         self,
-        testcase,  # TODO #102: fix type errors after setting type to Testcase
-        mode,  # TODO #102: fix type errors after setting type to Mode | run.Run
+        testcase: "testcase.Testcase",
+        mode: "Mode | run.Run",
         constraints: Optional[ConstraintsDict] = None,
-        args=None,
+        args: Optional[list[str]] = None,
     ) -> ExecResult:
         """
         Run this validator on the given testcase.
@@ -353,7 +365,7 @@ def run(
         ---------
 
         mode: either a run.Run (namely, when validating submission output) or a Mode
-            (namely, when validation a testcase)
+            (namely, when validating a testcase)
 
         Returns
         -------
@@ -363,7 +375,7 @@ def run(
         assert self.run_command is not None, "Validator should be built before running it"
 
         if mode == Mode.INPUT:
-            raise ValueError("OutputValidator do not support Mode.INPUT")
+            raise ValueError("OutputValidator does not support Mode.INPUT")
 
         in_path = testcase.in_path.resolve()
         ans_path = testcase.ans_path.resolve()
@@ -374,18 +386,16 @@ def run(
                 raise ValueError(
                     "OutputValidator in Mode.INVALID should only be run for data/invalid_output"
                 )
+            assert testcase.out_path is not None
             path = testcase.out_path.resolve()
         elif mode == Mode.VALID_OUTPUT:
-            if testcase.root != "valid_output":
-                raise ValueError(
-                    "OutputValidator in Mode.VALID_OUTPUT should only be run for data/valid_output"
-                )
+            assert testcase.out_path is not None
             path = testcase.out_path.resolve()
         else:
             assert mode != Mode.INPUT
             # mode is actually a Run
             path = mode.out_path
-            in_path = mode.in_path
+            in_path = mode.in_path  # relevant for multipass
 
         if self.language in Validator.FORMAT_VALIDATOR_LANGUAGES:
             raise ValueError("Invalid output validator language")
@@ -395,7 +405,7 @@ def run(
             cwd = mode.feedbackdir
         invocation = self.run_command + [in_path, ans_path, cwd]
 
-        with path.open() as file:
+        with path.open("rb") as file:
             ret = self._exec_helper(
                 invocation + arglist,
                 exec_code_map=validator_exec_code_map,
@@ -456,31 +466,49 @@ def sanity_check(problem, path, bar, strict_whitespace=True):
 
     if not path.exists():
         fatal(f"{path} not found during sanity check")
-        return
-    with open(path, "rb") as file:
-        name = {
-            ".in": "Input",
-            ".ans": "Answer",
-            ".out": "Output",
-        }[path.suffix]
-        file_bytes = file.read()
-        if _has_invalid_byte(file_bytes, other_whitespaces=not strict_whitespace):
-            bar.warn(f"{name} contains unexpected characters but was accepted!")
-        elif len(file_bytes) == 0:
+
+    name = {
+        ".in": "Input",
+        ".ans": "Answer",
+        ".out": "Output",
+    }[path.suffix]
+
+    file_bytes = path.read_bytes()
+
+    if len(file_bytes) == 0:
+        # only allow empty files for multipass .ans
+        if not (path.suffix == ".ans" and problem.multi_pass):
             bar.warn(f"{name} is empty but was accepted!")
-        elif len(file_bytes) > 20_000_000:
-            bar.warn(f"{name} is larger than 20MB!")
-        elif (
-            path.suffix in [".ans", ".out"]
-            and len(file_bytes) > problem.limits.output * 1024 * 1024
-        ):
+    else:
+        # enforce empty .ans file for interactive
+        if problem.interactive and path.suffix == ".ans":
+            bar.warn(f"use empty .ans file for {problem.settings.type_name()} problem")
+        return  # Since the .ans file MUST be empty, the other sanity checks can be skipped.
+
+    # check file size limits
+    # TODO: consider time limit?
+    file_size_limit = 20  # in MiB
+    inMiB = 1024 * 1024
+    if len(file_bytes) > file_size_limit * inMiB:
+        bar.warn(f"{name} is larger than {file_size_limit}MiB!")
+
+    # check output limits
+    if path.suffix in [".ans", ".out"]:
+        if len(file_bytes) > problem.limits.output * inMiB:
+            new_limit = (len(file_bytes) + inMiB - 1) // inMiB
             bar.warn(
-                f"{name} exceeds output limit (set limits->output to at least {(len(file_bytes) + 1024 * 1024 - 1) // 1024 // 1024}MiB in problem.yaml)"
+                f"{name} exceeds output limit (set limits->output to at least {new_limit}MiB in problem.yaml)"
             )
-        elif strict_whitespace:
-            if file_bytes[0] in [ord(" "), ord("\n")]:
-                bar.warn(f"{name} starts with whitespace but was accepted!")
-            elif file_bytes[-1] != ord("\n"):
-                bar.warn(f"{name} does not end with a newline but was accepted!")
-            elif _has_consecutive_whitespaces(file_bytes):
-                bar.warn(f"{name} contains consecutive whitespace characters but was accepted!")
+        elif 2 * len(file_bytes) > problem.limits.output * inMiB:
+            bar.warn(f"{name} is close to output limit (you should consider doubling it)")
+
+    # check content
+    if _has_invalid_byte(file_bytes, other_whitespaces=not strict_whitespace):
+        bar.warn(f"{name} contains unexpected characters but was accepted!")
+    if strict_whitespace and len(file_bytes) > 0:
+        if file_bytes[0] in [ord(" "), ord("\n")]:
+            bar.warn(f"{name} starts with whitespace but was accepted!")
+        if file_bytes[-1] != ord("\n"):
+            bar.warn(f"{name} does not end with a newline but was accepted!")
+        if _has_consecutive_whitespaces(file_bytes):
+            bar.warn(f"{name} contains consecutive whitespace characters but was accepted!")
diff --git a/bin/validator_tests.py b/bin/validator_tests.py
index 7e10fc945..c2a724dac 100644
--- a/bin/validator_tests.py
+++ b/bin/validator_tests.py
@@ -49,7 +49,7 @@ def decorator(func: T) -> T:
 
     # constant testcases
     register("latin-1")("Naïve")
-    register("empty")("")
+    register("empty", [InputValidator, OutputValidator])("")
     register("newline")("\n")
     register("fixed_random")("YVRtr&*teTsRjs8ZC2%kN*T63V@jJq!d")
     register("not_printable_ascii")("\x7f")
diff --git a/bin/verdicts.py b/bin/verdicts.py
index 34f422d3f..fb988457b 100644
--- a/bin/verdicts.py
+++ b/bin/verdicts.py
@@ -4,7 +4,7 @@
 import threading
 from enum import Enum
 from pathlib import Path
-from typing import Literal
+from typing import Literal, TYPE_CHECKING
 
 from colorama import Fore, Style
 
@@ -12,9 +12,12 @@
 import testcase
 from util import ProgressBar
 
+if TYPE_CHECKING:
+    import run
+
 
 class Verdict(Enum):
-    """The verdict of a testcase or testgroup"""
+    """The verdict of a test case or test group"""
 
     ACCEPTED = 1
     WRONG_ANSWER = 2
@@ -69,7 +72,7 @@ def color(self):
 class RunUntil(Enum):
     # Run until the lexicographically first error is known.
     FIRST_ERROR = 1
-    # Run until the lexicographically first timeout testcase is known.
+    # Run until the lexicographically first timeout test case is known.
     DURATION = 2
     # Run all cases.
     ALL = 3
@@ -135,14 +138,14 @@ def from_string_domjudge(s: str) -> Verdict:
 class Verdicts:
     """The verdicts of a submission.
 
-    Testcases and testgroups are identified by strings.  In particular,
-    * the testcase whose input file is 'a/b/1.in' is called 'a/b/1'
-    * the two topmost testgroups are 'sample', 'secret'
+    Test cases and test groups are identified by strings.  In particular,
+    * the test case whose input file is 'a/b/1.in' is called 'a/b/1'
+    * the two topmost test groups are 'sample', 'secret'
     * the root is called '.'
 
-    Initialised with all testcases. Individual verdicts are registered
+    Initialised with all test cases. Individual verdicts are registered
     with set(), which infers verdicts upwards in the tree as they become
-    available (and returns the topmost inferred testgroup).
+    available (and returns the topmost inferred test group).
     Verdicts (registered and inferred) are accessed with __getitem__
 
     >>> V = Verdicts(["a/b/1", "a/b/2", "a/c/1", "a/d/1", "b/3"], timeout=1)
@@ -152,26 +155,25 @@ class Verdicts:
     ACCEPTED None
 
     Attributes:
-    - run_until: Which testcases to run.
-    - children[testgroup]: the lexicographically sorted list of direct children (testgroups and testcases) of the given testnode
-
-    - verdict[testnode]: the verdict at the given testnode, or None. In particular,
+    - run_until: Which test cases to run.
+    - children[test_group]: the lexicographically sorted list of direct children (test groups and test cases) of the given test node
+    - verdict[test_node]: the verdict at the given test node, or None. In particular,
         verdict['.'] is the root verdict, sometimes called final verdict or submission verdict.
         Should not be directly set; use __setitem__ on the Verdict object instead.
 
         None: not computed yet.
         False: determined to be unneeded.
-    - duration[testcase]: the duration of the testcase
+    - duration[test_case]: the duration of the test case
     """
 
     def __init__(
         self,
-        testcases_list: list[testcase.Testcase],
+        test_cases_list: list[testcase.Testcase],
         timeout: int,
         run_until: RunUntil = RunUntil.FIRST_ERROR,
     ):
-        testcases: set[str] = set(t.name for t in testcases_list)
-        testgroups: set[str] = set(str(path) for tc in testcases for path in Path(tc).parents)
+        test_cases: set[str] = set(t.name for t in test_cases_list)
+        test_groups: set[str] = set(str(path) for tc in test_cases for path in Path(tc).parents)
 
         # Lock operations reading/writing non-static data.
         # Private methods assume the lock is already locked when entering a public method.
@@ -180,16 +182,16 @@ def __init__(
         self.run_until = run_until
         self.timeout = timeout
 
-        # (testcase | testgroup) -> Verdict | None | Literal[False]
+        # (test_case | test_group) -> Verdict | None | Literal[False]
         self.verdict: dict[str, Verdict | None | Literal[False]] = {
-            g: None for g in testcases | testgroups
+            g: None for g in test_cases | test_groups
         }
-        # testcase -> float | None
-        self.duration: dict[str, float | None] = {g: None for g in testcases}
+        # test_case -> float | None
+        self.duration: dict[str, float | None] = {g: None for g in test_cases}
 
-        # const testgroup -> [testgroup | testcase]
-        self.children: dict[str, list[str]] = {node: [] for node in testgroups}
-        for node in testcases | testgroups:
+        # const test_group -> [test_group | test_case]
+        self.children: dict[str, list[str]] = {node: [] for node in test_groups}
+        for node in test_cases | test_groups:
             if node != ".":
                 parent = str(Path(node).parent)
                 self.children[parent].append(node)
@@ -203,20 +205,20 @@ def __enter__(self):
     def __exit__(self, *args):
         self.lock.__exit__(*args)
 
-    def is_testgroup(self, node: str) -> bool:
-        """Is the given testnode name a testgroup (rather than a testcase)?
-        This assumes nonempty testgroups.
+    def is_test_group(self, node: str) -> bool:
+        """Is the given test node name a test group (rather than a test case)?
+        This assumes nonempty test groups.
         """
         return node in self.children
 
-    def is_testcase(self, node: str) -> bool:
-        """Is the given testnode name a testcase (rather than a testgroup)?
-        This assumes nonempty testgroups.
+    def is_test_case(self, node: str) -> bool:
+        """Is the given test node name a test case (rather than a test group)?
+        This assumes nonempty test groups.
         """
         return node not in self.children
 
-    def set(self, testcase: str, verdict: str | Verdict, duration: float):
-        """Set the verdict and duration of the given testcase (implying possibly others)
+    def set(self, test_case: str, verdict: str | Verdict, duration: float):
+        """Set the verdict and duration of the given test case (implying possibly others)
 
         verdict can be given as a Verdict or as a string using either long or
         short form ('ACCEPTED', 'AC', or Verdict.ACCEPTED).
@@ -224,23 +226,25 @@ def set(self, testcase: str, verdict: str | Verdict, duration: float):
         with self:
             if isinstance(verdict, str):
                 verdict = from_string(verdict)
-            self.duration[testcase] = duration
-            self._set_verdict_for_node(testcase, verdict, duration >= self.timeout)
+            self.duration[test_case] = duration
+            self._set_verdict_for_node(test_case, verdict, duration >= self.timeout)
 
-    def __getitem__(self, testnode) -> Verdict | None | Literal[False]:
+    def __getitem__(self, test_node) -> Verdict | None | Literal[False]:
         with self:
-            return self.verdict[testnode]
+            return self.verdict[test_node]
 
-    def salient_testcase(self) -> tuple[str, float]:
-        """The testcase most salient to the root verdict.
-        If self['.'] is Verdict.ACCEPTED, then this is the slowest testcase.
-        Otherwise, it is the lexicographically first testcase that was rejected."""
+    def salient_test_case(self) -> tuple[str, float]:
+        """The test case most salient to the root verdict.
+        If self['.'] is Verdict.ACCEPTED, then this is the slowest test case.
+        Otherwise, it is the lexicographically first test case that was rejected."""
         with self:
             match self["."]:
                 case None:
-                    raise ValueError("Salient testcase called before submission verdict determined")
+                    raise ValueError(
+                        "Salient test case called before submission verdict determined"
+                    )
                 case Verdict.ACCEPTED:
-                    # This implicitly assumes there is at least one testcase.
+                    # This implicitly assumes there is at least one test case.
                     return max(
                         ((tc, d) for tc, d in self.duration.items() if d is not None),
                         key=lambda x: x[1],
@@ -249,14 +253,14 @@ def salient_testcase(self) -> tuple[str, float]:
                     tc = min(
                         tc
                         for tc, v in self.verdict.items()
-                        if self.is_testcase(tc) and v != Verdict.ACCEPTED
+                        if self.is_test_case(tc) and v != Verdict.ACCEPTED
                     )
                     duration = self.duration[tc]
                     assert duration is not None
                     return (tc, duration)
 
-    def slowest_testcase(self) -> None | tuple[str, float]:
-        """The slowest testcase, if all cases were run or a timeout occurred."""
+    def slowest_test_case(self) -> None | tuple[str, float]:
+        """The slowest test case, if all cases were run or a timeout occurred."""
         with self:
             tc, d = max(
                 ((tc, d) for tc, d in self.duration.items() if d is not None),
@@ -270,8 +274,8 @@ def slowest_testcase(self) -> None | tuple[str, float]:
 
             return tc, d
 
-    def aggregate(self, testgroup: str) -> Verdict:
-        """The aggregate verdict at the given testgroup.
+    def aggregate(self, test_group: str) -> Verdict:
+        """The aggregate verdict at the given test group.
         Computes the lexicographically first non-accepted verdict.
 
         Raises:
@@ -280,29 +284,29 @@ def aggregate(self, testgroup: str) -> Verdict:
             [AC, None, RTE] is not (the first error cannot be determined).
         """
         with self:
-            child_verdicts = list(self.verdict[c] for c in self.children[testgroup])
+            child_verdicts = list(self.verdict[c] for c in self.children[test_group])
             if all(v == Verdict.ACCEPTED for v in child_verdicts):
                 return Verdict.ACCEPTED
             else:
                 first_error = next(v for v in child_verdicts if v != Verdict.ACCEPTED)
                 if first_error in [None, False]:
                     raise ValueError(
-                        f"Verdict aggregation at {testgroup} with unknown child verdicts"
+                        f"Verdict aggregation at {test_group} with unknown child verdicts"
                     )
                 assert first_error is not None
                 assert first_error is not False
                 return first_error
 
-    def _set_verdict_for_node(self, testnode: str, verdict: Verdict, timeout: bool):
+    def _set_verdict_for_node(self, test_node: str, verdict: Verdict, timeout: bool):
         # This assumes self.lock is already held.
         # Note that `False` verdicts can be overwritten if they were already started before being set to False.
-        if self.verdict[testnode] not in [None, False]:
+        if self.verdict[test_node] not in [None, False]:
             raise ValueError(
-                f"Overwriting verdict of {testnode} to {verdict} (was {self.verdict[testnode]})"
+                f"Overwriting verdict of {test_node} to {verdict} (was {self.verdict[test_node]})"
             )
-        self.verdict[testnode] = verdict
-        if testnode != ".":
-            parent = str(Path(testnode).parent)
+        self.verdict[test_node] = verdict
+        if test_node != ".":
+            parent = str(Path(test_node).parent)
 
             # Possibly mark sibling cases as unneeded.
             match self.run_until:
@@ -310,14 +314,14 @@ def _set_verdict_for_node(self, testnode: str, verdict: Verdict, timeout: bool):
                     # On error, set all later siblings to False.
                     if verdict != Verdict.ACCEPTED:
                         for sibling in self.children[parent]:
-                            if sibling > testnode and self.verdict[sibling] is None:
+                            if sibling > test_node and self.verdict[sibling] is None:
                                 self.verdict[sibling] = False
 
                 case RunUntil.DURATION:
                     # On timeout, set all later siblings to False.
                     if timeout:
                         for sibling in self.children[parent]:
-                            if sibling > testnode and self.verdict[sibling] is None:
+                            if sibling > test_node and self.verdict[sibling] is None:
                                 self.verdict[sibling] = False
 
                 case RunUntil.ALL:
@@ -333,29 +337,29 @@ def _set_verdict_for_node(self, testnode: str, verdict: Verdict, timeout: bool):
                     # parent verdict cannot be determined yet
                     pass
 
-    def run_is_needed(self, testcase: str) -> bool:
+    def run_is_needed(self, test_case: str) -> bool:
         """
         There are 3 modes for running cases:
         - default: run until the lexicographically first error is known
         - duration: run until the slowest case is known
         - all: run all cases
 
-        Testcases/groups have their verdict set to `False` as soon as it is determined they are not needed.
+        Test cases/groups have their verdict set to `False` as soon as it is determined they are not needed.
         """
         with self:
-            if self.verdict[testcase] is not None:
+            if self.verdict[test_case] is not None:
                 return False
 
             match self.run_until:
                 case RunUntil.FIRST_ERROR:
                     # Run only if parents do not have known verdicts yet.
                     return all(
-                        self.verdict[str(parent)] is None for parent in Path(testcase).parents
+                        self.verdict[str(parent)] is None for parent in Path(test_case).parents
                     )
                 case RunUntil.DURATION:
                     # Run only if not explicitly marked as unneeded.
                     return all(
-                        self.verdict[str(parent)] is not False for parent in Path(testcase).parents
+                        self.verdict[str(parent)] is not False for parent in Path(test_case).parents
                     )
                 case RunUntil.ALL:
                     # Run all cases.
@@ -374,16 +378,16 @@ def __iter__(self):
     def __init__(
         self,
         submissions,
-        testcases: list[testcase.Testcase],
+        test_cases: list[testcase.Testcase],
         width: int = ProgressBar.columns,
         height: int = shutil.get_terminal_size().lines,
         max_name_width: int = 50,
     ):
         self.submissions: list[str] = [s.name for s in submissions]
-        self.testcases: list[str] = [t.name for t in testcases]
-        self.samples: set[str] = set(t.name for t in testcases if t.root == "sample")
+        self.test_cases: list[str] = [t.name for t in test_cases]
+        self.samples: set[str] = set(t.name for t in test_cases if t.root == "sample")
         self.results: list[Verdicts] = []
-        self.current_testcases: set[str] = set()
+        self.current_test_cases: set[str] = set()
         self.last_printed: list[int] = []
         self.width: int
         self.print_without_force: bool
@@ -407,11 +411,11 @@ def __init__(
                 lines = [f"{Style.DIM}{Fore.CYAN}{name}{Fore.WHITE}:"]
 
                 verdicts = []
-                for t, testcase in enumerate(self.testcases):
+                for t, test_case in enumerate(self.test_cases):
                     if t % 10 == 0:
                         verdicts.append(VerdictTable.Group(0, ""))
                     verdicts[-1].length += 1
-                    verdicts[-1].text += "s" if testcase in self.samples else "-"
+                    verdicts[-1].text += "s" if test_case in self.samples else "-"
 
                 printed = self.name_width + 1
                 for length, tmp in verdicts:
@@ -439,14 +443,14 @@ def __init__(
 
     def next_submission(self, verdicts: Verdicts):
         self.results.append(verdicts)
-        self.current_testcases = set()
+        self.current_test_cases = set()
 
-    def add_testcase(self, testcase: str):
-        self.current_testcases.add(testcase)
+    def add_test_case(self, test_case: str):
+        self.current_test_cases.add(test_case)
 
-    def update_verdicts(self, testcase: str, verdict: str | Verdict, duration: float):
-        self.results[-1].set(testcase, verdict, duration)
-        self.current_testcases.discard(testcase)
+    def update_verdicts(self, test_case: str, verdict: str | Verdict, duration: float):
+        self.results[-1].set(test_case, verdict, duration)
+        self.current_test_cases.discard(test_case)
 
     def _clear(self, *, force: bool = True):
         if force or self.print_without_force:
@@ -466,11 +470,11 @@ def _clear(self, *, force: bool = True):
 
                 self.last_printed = []
 
-    def _get_verdict(self, s: int, testcase: str, check_sample: bool = True) -> str:
+    def _get_verdict(self, s: int, test_case: str, check_sample: bool = True) -> str:
         res = f"{Style.DIM}-{Style.RESET_ALL}"
-        if s < len(self.results) and self.results[s][testcase] not in [None, False]:
-            res = to_char(self.results[s][testcase], check_sample and testcase in self.samples)
-        elif s + 1 == len(self.results) and testcase in self.current_testcases:
+        if s < len(self.results) and self.results[s][test_case] not in [None, False]:
+            res = to_char(self.results[s][test_case], check_sample and test_case in self.samples)
+        elif s + 1 == len(self.results) and test_case in self.current_test_cases:
             res = Style.DIM + to_char(None)
         return res
 
@@ -518,7 +522,7 @@ def _print_tree(
                 first = True
                 verdicts = []
                 for child in reversed(self.results[-1].children[node]):
-                    if self.results[-1].is_testgroup(child):
+                    if self.results[-1].is_test_group(child):
                         if first:
                             stack.append((child, indent + pipe + " ", "└─", True))
                             first = False
@@ -603,11 +607,11 @@ def _print_table(
 
                 # group verdicts in parts of length at most ten
                 verdicts = []
-                for t, testcase in enumerate(self.testcases):
+                for t, test_case in enumerate(self.test_cases):
                     if t % 10 == 0:
                         verdicts.append(VerdictTable.Group(0, ""))
                     verdicts[-1].length += 1
-                    verdicts[-1].text += self._get_verdict(s, testcase)
+                    verdicts[-1].text += self._get_verdict(s, test_case)
 
                 for length, tmp in verdicts:
                     if self.width >= 0 and printed + 1 + length > self.width:
@@ -680,8 +684,8 @@ def _print(self, *objects, sep="", end="\n", file=sys.stderr, flush=True):
         print(*objects, sep=sep, end=end, file=file, flush=False)
 
     # TODO #102: item has type `str` in the base class, but type `run.Run` here.
-    def start(self, item):  # type: ignore[override]
-        self.table.add_testcase(item.testcase.name)
+    def start(self, item: "run.Run"):  # type: ignore[override]
+        self.table.add_test_case(item.testcase.name)
         return super().start(item)
 
     def done(self, success=True, message="", data="", print_item=True):
diff --git a/bin/visualize.py b/bin/visualize.py
new file mode 100644
index 000000000..627512bda
--- /dev/null
+++ b/bin/visualize.py
@@ -0,0 +1,95 @@
+from pathlib import Path
+from typing import Any, Final, Optional, TYPE_CHECKING
+
+import program
+
+from util import *
+
+if TYPE_CHECKING:  # Prevent circular import: https://stackoverflow.com/a/39757388
+    from problem import Problem
+
+
+class InputVisualizer(program.Program):
+    """
+    Visualizes a test case, called as:
+
+        ./visualizer input answer [args]
+
+    """
+
+    visualizer_type: Final[str] = "input"
+
+    source_dir: Final[str] = "input_visualizer"
+
+    args_key: Final[str] = "input_visualizer_args"
+
+    def __init__(self, problem: "Problem", path: Path, **kwargs: Any):
+        super().__init__(
+            problem,
+            path,
+            InputVisualizer.source_dir,
+            limits={"timeout": problem.limits.visualizer_time},
+            substitute_constants=True,
+            **kwargs,
+        )
+
+    # Run the visualizer (should create a testcase.<ext> file).
+    def run(
+        self, in_path: Path, ans_path: Path, cwd: Path, args: Optional[list[str]] = None
+    ) -> ExecResult:
+        assert self.run_command is not None, "Input Visualizer should be built before running it"
+
+        return self._exec_command(
+            self.run_command + [in_path, ans_path] + (args or []),
+            cwd=cwd,
+        )
+
+
+class OutputVisualizer(program.Program):
+    """
+    Visualizes the output of a submission
+
+        ./visualizer input answer feedbackdir [args] < output
+
+    """
+
+    visualizer_type: Final[str] = "output"
+
+    source_dir: Final[str] = "output_visualizer"
+
+    args_key: Final[str] = "output_visualizer_args"
+
+    def __init__(self, problem: "Problem", path: Path, **kwargs: Any):
+        super().__init__(
+            problem,
+            path,
+            OutputVisualizer.source_dir,
+            limits={"timeout": problem.limits.visualizer_time},
+            substitute_constants=True,
+            **kwargs,
+        )
+
+    # Run the visualizer.
+    # should write to feedbackdir/judgeimage.<ext> and/or feedbackdir/teamimage.<ext>
+    def run(
+        self,
+        in_path: Path,
+        ans_path: Path,
+        out_path: Optional[Path],
+        cwd: Path,
+        args: Optional[list[str]] = None,
+    ) -> ExecResult:
+        assert self.run_command is not None, "Output Visualizer should be built before running it"
+        assert (out_path is None) == self.problem.interactive, (
+            "out_path should be None if and only if problem is interactive"
+        )
+
+        command = self.run_command + [in_path, ans_path, cwd] + (args or [])
+        if out_path is not None:
+            with out_path.open("rb") as out_file:
+                return self._exec_command(command, stdin=out_file, cwd=cwd)
+        else:
+            return self._exec_command(command, cwd=cwd)
+
+
+AnyVisualizer = InputVisualizer | OutputVisualizer
diff --git a/doc/commands.md b/doc/commands.md
index 9065e6e77..7e0d27ad7 100644
--- a/doc/commands.md
+++ b/doc/commands.md
@@ -57,7 +57,7 @@ The flags below work for any subcommand:
 - `--no-bar`: Disable showing progress bars. This is useful when running in non-interactive contexts (such as CI jobs) or on platforms/terminals that don't handle the progress bars well.
 - `--error`/`-e`: show full output of failing commands using `--error`. The default is to show a short snippet only.
 - `--force-build`: Force rebuilding binaries instead of reusing cached version.
-- `--language <LANG>`: select a single language to use. `<LANG>` should be a language code like `en` or `nl`.
+- `--lang`: select languages to use for LaTeX commands. The languages should be specified by language codes like `en` or `nl`.
 
 # Problem development
 
@@ -97,7 +97,7 @@ Use `bt run -v` to show results for all testcases.
 
   - The path of the `.in` file: `data/secret/1.in`
   - The path of the `.ans` file: `data/secret/1.ans` (any other extension also works, even if the file doesn't exist)
-  - The basename of the testcase: `data/secret/1`
+  - The base name of the testcase: `data/secret/1`
   - A directory: `data/secret`. In this case, all `.in` files that are (nested) in this directory will be used.
 
   Testcases must always be inside the `data` directory. Anything outside `data/` will raise an error.
@@ -112,6 +112,7 @@ Use `bt run -v` to show results for all testcases.
 - `--overview`/`-o`: Print a live overview of the received verdicts for all submissions and testcases. If combined with `--no-bar` only the final table is printed.
 - `--no-testcase-sanity-checks`: when passed, all sanity checks on the testcases are skipped. You might want to set this in `.bapctools.yaml`.
 - `--sanitizer`: when passed, run submissions with additional sanitizer flags (currently only C++). Note that this removes all memory limits for submissions.
+- `--visualizer`: when passed, run the output visualizer.
 
 ## `test`
 
@@ -239,7 +240,7 @@ This table contains:
 
 - The problem label and shortname.
 - Whether `problem.yaml` and `domjudge.ini` are found.
-- Whether `problem_statement/problem.en.tex` and `problem_statement/solution.tex` are found.
+- Whether `statement/problem.en.tex` and `solution/solution.en.tex` are found.
 - Whether the problem has any `input_validators` and `output_validators`.
 - The number of `sample` and `secret` testcases.
 - The number of `accepted`, `wrong_answer`, and `time_limit_exceeded` solutions.
@@ -305,7 +306,7 @@ See the [implementation notes](implementation_notes.md#constraints-checking) for
 
 **Verify testcase**
 
-Validators that accept the `--constraints_file <path>` option are run on all testcases to check whether the bounds specified in the validator are actually reached by the testdata. A warning is raised when this is not the case.
+Validators that accept the `--constraints_file <path>` option are run on all testcases to check whether the bounds specified in the validator are actually reached by the test data. A warning is raised when this is not the case.
 E.g. when an `input_validator` based on [headers/validation.h](../headers/validation.h) does `v.read_integer("n", 1, 1000)` (on line `7`) and the maximum value of `n` over all testcases is `999`, the following warning will be raised:
 
 ```
@@ -346,15 +347,14 @@ Settings for this contest will be asked for interactively. The following files a
 ```
 /tmp/tmp % bt new_contest
 name: NWERC 2020
-subtitle []: The Northwestern European Programming Contest 2020
-dirname [nwerc2020]:
-author [The NWERC 2020 jury]:
-testsession? [n (y/n)]: n
-year [2020]:
-source [NWERC 2020]:
-source url []: 2020.nwerc.eu
-license [cc by-sa]:
-rights owner [author]:
+subtitle: The Northwestern European Programming Contest 2020
+dirname (nwerc2020):
+author (The NWERC 2020 jury):
+test session? (y/N): n
+year (2020):
+source url: 2020.nwerc.eu
+license (cc by-sa):
+rights owner (if left empty, defaults to problem author):
 ```
 
 ## `new_problem`
@@ -364,9 +364,13 @@ Create a new problem directory and fill it with skel files. If `problems.yaml` i
 ```
 ~nwerc2020 % bt new_problem
 problem name (en): Test Problem
-dirname [testproblem]:
+dirname (testproblem):
 author: Ragnar Groot Koerkamp
-validation (default/custom/custom interactive) [default]:
+type (pass-fail):
+source (NWERC 2020):
+source url (2020.nwerc.eu):
+license (cc by-sa):
+rights owner (if left empty, defaults to problem author):
 LOG: Copying /home/philae/git/bapc/BAPCtools/skel/problem to testproblem.
 ```
 
@@ -427,7 +431,7 @@ contest_pdf_nwerc2020:
       - ./bt solutions --cp --no-bar --contest nwerc2020
   only:
     changes:
-      - nwerc2020/testproblem/problem_statement/**/*
+      - nwerc2020/testproblem/statement/**/*
 
   artifacts:
     expire_in: 1 week
@@ -570,7 +574,7 @@ When run for a contest:
   - Kattis needs the `input_validators` directory, while DOMjudge doesn't use this.
   - Kattis problem zips get an additional top level directory named after the problem shortname.
   - _Statements_: Kattis’s problemtools builds statement HTML (and PDF) using `problem2html` (and `problem2pdf`) rather than `bt pdf`. Problem authors should check the resulting statements after exporting to Kattis; pay attention to:
-    - The command `bt zip --kattis` exports `problem_statement/*` but not its subdirectories, so make sure illustrations and `\input`-ed tex sources are included.
+    - The command `bt zip --kattis` exports `{statement,solution}/*` but not its subdirectories, so make sure illustrations and `\input`-ed tex sources are included.
     - Proper images scaling in the HTML output requires explict widths, such as `\includegraphics[width=.5\textwidth]{foo.png}`.
 
 ## `export`
@@ -628,7 +632,7 @@ This file should contain a list of problems, with for every problem the keys `id
 
 - `--colors`: Apply the given list of colors to the list of problems, in the same order as in `problems.yaml`.
   Should be a comma-separated list of colors (hash-sign is optional), e.g.: `--colors ff0000,00ff00,0000ff`.
-- `--sort`: Sort the problems in `problems.yaml` and re-label them starting from `A` (or `X` if `contest.yaml` contains `testsession: True`).
+- `--sort`: Sort the problems in `problems.yaml` and re-label them starting from `A` (or `X` if `contest.yaml` contains `test_session: True`).
 
 ## `tmp`
 
diff --git a/doc/generators.md b/doc/generators.md
index 15db34295..60e627b86 100644
--- a/doc/generators.md
+++ b/doc/generators.md
@@ -26,10 +26,8 @@ The two main object types are `directory` and `generator`. The root of `generato
 
 **Directory objects** take the following keys:
 
-- `testdata.yaml`: Optional yaml configuration that will be copied to `testdata.yaml` in this directory.
+- `test_group.yaml`: Optional yaml configuration that will be copied to `test_group.yaml` in this directory.
 - `solution`: Optional invocation of a solution to be used to generate `.ans` files. Set to empty to disable generating `.ans`. (Useful for e.g. the `data/samples/` directory.) This must be an absolute path relative to the problem root.
-- `visualizer`: Optional invocation of a visualizer to generate visualizations for each test case in this directory.
-  This must be an absolute path relative to the problem root. Set to empty to disable.
 - `random_salt`: Optional string that will be prepended to each command before computing its `{seed}`. May be used to regenerate all random cases and to prevent predictable seeds.
 - `data`: The test cases / test groups contained in this directory. This may take two forms:
   - A dictionary, each key is the name of a test case/test group, and each value must be a `directory` or `generator` object.
@@ -47,7 +45,7 @@ Or as a shorthand:
 
 The follwoing things should hold:
 - A `.in` file must be specified/generated by this
-- If a `.ans` file is not specified/generated a `solution` must be provided that will be used to generate the `.ans`. For interactive Problems
+- If a `.ans` file is not specified/generated, a `solution` must be provided that will be used to generate the `.ans`. For interactive or multi-pass problems, an empty `.ans` will be generated.
 
 **Root object**
 The root of the `generators.yaml` is a `directory` object with one optional additional key:
diff --git a/doc/generators.yaml b/doc/generators.yaml
index b6e13a7bd..a38be444b 100644
--- a/doc/generators.yaml
+++ b/doc/generators.yaml
@@ -11,24 +11,13 @@
 # TOOLING: may pick a default if not specified, but should raise an error.
 solution: /submissions/accepted/sol.py
 
-# The visualizer is used when no suitable image was generated already.
-# This should read `testcase.in` and/or `testcase.ans` from the current working
-# directory, and write `testcase.ext` for an extension in:
-# .png, .jpg, .svg
-#
-# This must be the absolute path, starting in the problem root.
-#
-# TOOLING: may provide a flag to make running this optional, as it can be slow
-# and usually isn't required.
-visualizer: /visualizers/vis.py
-
 # Optionally, a salt for generating the {seed} variables. Will be prepended to
 # the command being run.
 random_salt: abcd
 
-# The top level may contain a testdata.yaml that will be written to data/ as specified.
-testdata.yaml:
-  output_validator_args: ""
+# The top level may contain a test_group.yaml that will be written to data/ as specified.
+test_group.yaml:
+  output_validator_args: []
 
 # We support three types of generators:
 # - Standalone files, like generators/a.cpp, generators/b.py, ..., which will
@@ -85,19 +74,20 @@ data:
 
       "2":
         in: 23 foo # generates the test case input file data/2.in with contents "23 foo"
-      # The copy key indicates a manual testcase that will be copied
-      # from the given directory into the target testcase. The given directory
+      # The copy key indicates a manual test case that will be copied
+      # from the given directory into the target test case. The given directory
       # must not start with a /, not include an extension and will be relative to generators/.
       "3":
         copy: manual_cases/sample/3
-      # Small testcases can be specified explictly:
+      # Small test cases can be specified explictly:
       "4":
         in: 1 0
         # Values must be a strings, so `1` is wrapped in quotes.
         ans: "1"
-        desc: Right identity for addition
-        hint: Make sure addition with zero also works
-      # Use YAML multiline syntax for multiline testcases
+        yaml:
+          description: Right identity for addition
+          hint: Make sure addition with zero also works
+      # Use YAML multiline syntax for multiline test cases
       # The pipe | preserves newlines, but strips indentation whitespace.
       # See also https://yaml-multiline.info/
       "5":
@@ -109,14 +99,14 @@ data:
           23
           0
           -4
-  # Every testcase present in the directory must be listed.
-  # TOOLING: may still allow unlisted testcases and warn about them.
+  # Every test case present in the directory must be listed.
+  # TOOLING: may still allow unlisted test cases and warn about them.
   #'6':
 
   secret:
     include:
-      # You can include other testcroups by there yaml name
-      - 'sample'
+      # You can include other test groups by their yaml name
+      - "sample"
       # This will include "1", "2", "3", "4", and "5" from sample
     data:
       # Types of generator programs.
@@ -147,10 +137,10 @@ data:
       11-random-3: graph seed={seed:2} # Different seed, because command isn't the same.
       #11-random-4: graph {seed} {seed:2}  # Not allowed because the regex matches twice.
       12-counted:
-          generate: graph {seed:3} {count}
-          count: 2                       # generate two testcases at once
+        generate: graph {seed:3} {count}
+        count: 2 # generate two test cases at once
 
-      # No key (testcase or testgroup) may be a prefix of another key.
+      # No key (test case or test group) may be a prefix of another key.
       #01-second: graph 6                     # Collision with rule 01 above.
       #12-counted-1: graph 7                  # Collision with the first rule of 12-counted above
       #12-counted-2: graph 8                  # Collision with the second rule of 12-counted above
@@ -158,48 +148,48 @@ data:
 
       # Commands are only allowed to read and write files of the form
       # `testcase.<ext>`, where <ext> is a known file extension in
-      # .in, .ans, .hint, .desc, .png, .jpg, .svg.
+      # .in, .ans, .out, .yaml, .png, .jpg, .svg.
       # Any such written files will be saved.
       #
       # In case a generator program writes testcase.in, its stdout will be ignored.
-      # In case testcase.in is not created, stdout will be used as the input for the testcase.
+      # In case testcase.in is not created, stdout will be used as the input for the test case.
       #
       # The generator below generates and writes both testcase.in and testcase.ans, and
       # the optionally specified `solution:` will not be called.
       "13": write_in_and_ans.py
 
-      # To override the global/testgroup configuration on a per-testcase basis,
-      # a dictionary may be used. This allows the solution: and visualizer: keys,
+      # To override the global/test group configuration on a per-test-case basis,
+      # a dictionary may be used. This allows the solution: key,
       # as well as the generate: key which contains the command to execute.
-      14_no_visualizer:
+      14_override:
         generate: large_case_generator.py 1000000
         solution: /generators/gnu_multi_precision.cpp
-        visualizer: # Empty to disable the visualizer here.
         random_salt: "123"
 
       # An entry must include *some* key that produces an in-file,
       # either by using 'in', 'copy', or 'generate'
       # 14_no_input_produced: # this is an error
       #   solution: /submissions/accepted/foo.py
-      #   desc: add two numbers
-      #   hint: check for maxint!
+      #   yaml:
+      #     description: add two numbers
+      #     hint: check for maxint!
 
-      # Introduce a testgroup.
+      # Introduce a test group.
       # The top-level `data:` key is always assumed to be a directory.
       hard_cases_group:
-        # Directories may contain a testdata.yaml that will be written as specified.
-        testdata.yaml:
-          output_validator_args: space_change_sensitive
+        # Directories may contain a test_group.yaml that will be written as specified.
+        test_group.yaml:
+          output_validator_args: [space_change_sensitive]
 
-        # To enable automatic numbering of testcases, data: may also contain a list of
+        # To enable automatic numbering of test cases, data: may also contain a list of
         # single-element dictionaries instead of a single dictionary. In this case,
-        # testcases and/or groups will be numbered in the order they appear, starting at
+        # test cases and/or groups will be numbered in the order they appear, starting at
         # 1. The system will determine the required number of digits to use and numbers
         # will be zero-padded accordingly, using a dash as separator from the given name
         # (when the given name is not empty). Each dictionary in the list must contain a
         # single item.
         #
-        # Numbering is per directory. Testcases/testgroups are ordered by the order of lists
+        # Numbering is per directory. Test cases/test groups are ordered by the order of lists
         # and alphabetical for dictionaries.
         data:
           # 15.in
@@ -218,18 +208,18 @@ data:
           - j: tree j
           # 24-h
           - k: tree k
-          # When mixing testcases and testgroups within a testgroup, testgroups
+          # When mixing test cases and test groups within a test group, test groups
           # must be last.
-          # Testgroup numbers are always prefixed with g when they are numbered.
-          # g1-numbered_testgroup
-          - numbered_testgroup:
+          # Test group numbers are always prefixed with g when they are numbered.
+          # 1-numbered_test_group
+          - numbered_test_group:
               data:
                 # 18-c
                 - c: tree c
                 # 19-d
                 - d: tree d
-          # g2-numbered_testgroup
-          - numbered_testgroup:
+          # 2-numbered_test_group
+          - numbered_test_group:
               data:
                 # e
                 e: tree e
@@ -241,11 +231,11 @@ data:
 #  15: tree empty
 #  16-a: tree a
 #  17-b: tree b
-#  g1-numbered_testgroup:
+#  1-numbered_test_group:
 #    data:
 #      18-c: tree c
 #      19-d: tree d
-#  g2-numbered_testgroup:
+#  2-numbered_test_group:
 #    data:
 #      e: tree e
 #      f: tree f
diff --git a/doc/implementation_notes.md b/doc/implementation_notes.md
index 500970674..216d767cd 100644
--- a/doc/implementation_notes.md
+++ b/doc/implementation_notes.md
@@ -154,7 +154,7 @@ The following placeholders are automatically substituted in the `contest_data.te
 {%subtitle%}
 {%year%}
 {%author%}
-{%testsession%}
+{%test_session%}
 {%logofile%}
 ...
 <any entry in the contest.yaml>
@@ -163,15 +163,15 @@ The following placeholders are automatically substituted in the `contest_data.te
 ## Solution slides
 
 Solutions are rendered in a similar way to the contest pdf. It uses the
-`problem_statement/solution.tex` files as inputs. The main difference is that
+`solution/solution.<lang>.tex` files as inputs. The main difference is that
 you can provide additional files in `<contestdirectory>/`:
 
-- `solutions_header.xy.tex`: slides prepended to the first problem, for the
+- `solutions_header.<lang>.tex`: slides prepended to the first problem, for the
   current language.
-- `solutions_footer.xy.tex`: slides appended after the last problem, for the
+- `solutions_footer.<lang>.tex`: slides appended after the last problem, for the
   current language.
 
-The following placeholders are automatically substituted in the `solution.tex`:
+The following placeholders are automatically substituted in the `solution.<lang>.tex`:
 ```
 {%problemlabel%}
 {%problemyamlname%}
@@ -190,7 +190,7 @@ There is some special support for handling _solve stats_: post-contest data on h
   ```
   \newcommand{\solvestatsA}{\printsolvestats{<number submissions>}{<number accepted>}{<number unknown>}}
   ```
-  When this file is present, each `problem_statement/solution.tex` may use `\solvestats` to print a line like:
+  When this file is present, each `solution/solution.<lang>.tex` may use `\solvestats` to print a line like:
   ```
   Statistics: 15 submissions, 3 accepted, 8 unknown
   ```
diff --git a/doc/multiple_languages.md b/doc/multiple_languages.md
index 0c6177355..3eeb2e661 100644
--- a/doc/multiple_languages.md
+++ b/doc/multiple_languages.md
@@ -15,18 +15,18 @@ Here, `LANG` is a two-letter language code, see
 
 It is expected that the languages keys in the metadata and statement files agree.
 
-The default language for BAPCtools is English, but multiple languages can be specified at various points of the tool, typically using the `--language` flag or configuration files.
+The default language for BAPCtools is English, but multiple languages can be specified at various points of the tool, typically using the `--lang` flag or configuration files.
 
 ## Creating a contest
 
 In short,
 
-1. configure `languages` in `.bapctools.yaml`.
-2. add a skeleton for `problem.LANG.tex` in `skel/problem/problem_statement`.
+1. configure `lang` in `.bapctools.yaml`.
+2. add a skeleton for `problem.LANG.tex` in `skel/problem/statement`.
 
-### Configure `language`
+### Configure `lang`
 
-To create a contest supporting French, Dutch, and Luxembourgish, set the configurartion key `languages` to the list `['nl', 'fr', 'lt']`.
+To create a contest supporting French, Dutch, and Luxembourgish, set the configurartion key `lang` to the list `['nl', 'fr', 'lt']`.
 Configuration keys can be set in many ways, see **Personal configuration file** in the BAPCtools documentation, but an easy way is to create a new contest:
 
 ```sh
@@ -36,7 +36,7 @@ bt new_contest
 and then create or extend the file `<contestdirectory>/.bapctools.yaml` with
 
 ```yaml
-languages:
+lang:
   - nl
   - fr
   - lt
@@ -44,9 +44,9 @@ languages:
 
 ### Add skeleton statements
 
-The skeleton directory for a new problem statement (see `bt skel` and `bt new_problem`) by default only supports English and will populate `<problem_name>/problem_statement/problem.en.tex` with a default statement.
+The skeleton directory for a new problem statement (see `bt skel` and `bt new_problem`) by default only supports English and will populate `<problem_name>/statement/problem.en.tex` with a default statement.
 To support, _e.g._, German, you need to add `problem.de.tex`.
-To do this automatically for each `bt new_problem`, create a problem skeleton in `<contestdirectory>/skel/problem`, and add `problem_statement/problem.de.tex`, for instance like this:
+To do this automatically for each `bt new_problem`, create a problem skeleton in `<contestdirectory>/skel/problem`, and add `statement/problem.de.tex`, for instance like this:
 
 ```tex
 \problemname{\problemyamlname} % replaced by name['de'] from problem.yaml
@@ -82,13 +82,13 @@ To create a problem,
 bt new_problem
 ```
 
-will look for the `languages` configuration (for instance, at contest level) and use that by default.
+will look for the `lang` configuration (for instance, at contest level) and use that by default.
 Thus, if the contest is set up as above, you need to do nothing extra.
 
 With arguments, or outside of a contest directory,
 
 ```sh
-bt new_problem --language en --language fr
+bt new_problem --lang en fr
 ```
 
 creates a problem with two languages, English and French.
@@ -108,7 +108,7 @@ creates PDFs for every problem language statement `problem.xy.tex`.
 With arguments,
 
 ```sh
-bt pdf --language en --language fr
+bt pdf --lang en fr
 ```
 
 produces PDFs for English and French.
@@ -117,7 +117,7 @@ The resulting PDFs are named `<problemdirectory>/problem.xy.pdf`.
 
 ## Solution PDF
 
-Similarly, `bt solutions [--language en --language fr]` creates
+Similarly, `bt solutions [--lang en fr]` creates
 `<problemdirectory>/solution.xy.pdf` for the given languages, defaulting to
 all available `solution.xy.tex` files.
 
@@ -129,6 +129,6 @@ a warning that they should be renamed to include the language suffix in their fi
 At the contest level things work similarly, and `contest.xy.pdf` and
 `solutions.xy.pdf` are created using `bt pdf` and `bt solutions` respectively.
 By default, only those languages `xy` are used for which
-`<problemdirectory>/problem_statement/problem.xy.tex` is available for all problems in the
+`<problemdirectory>/statement/problem.xy.tex` is available for all problems in the
 contest. Solution slides are skipped for problems without a corresponding
 `<probblemdirectory>/problemstatement/solution.xy.tex` file.
diff --git a/doc/validation.md b/doc/validation.md
index 387174e7f..87f98eff1 100644
--- a/doc/validation.md
+++ b/doc/validation.md
@@ -10,7 +10,7 @@ Input and answer validation run on the _files_ in `data/*`; their purpose is to
 Output validation runs on the output of the author submissions in `submissions` (and eventually on solver submissions when the problem is hosted on a judge system);
 the purpose of output validation is to check correctness of _submissions_.
 
-The testcases in `/data/sample` and `/data/secret` must pass each of input, answer, and output validation;
+The test cases in `/data/sample` and `/data/secret` must pass each of input, answer, and output validation;
 whereas submission output must only pass output validation.
 
 
@@ -18,7 +18,7 @@ whereas submission output must only pass output validation.
 
 These are some things that hold for all types of validation mentioned below.
 
-- For each testcase, all validators of the same type are run in lexicographic order. If one
+- For each test case, all validators of the same type are run in lexicographic order. If one
   fails, later ones are skipped.
 - In BAPCtools, the current working directory is always a temporary
   `<testcase>.feedbackdir` directory.
@@ -30,22 +30,22 @@ These are some things that hold for all types of validation mentioned below.
 - The return code must be `43` for failed validation. (Note that the spec is
   slightly more lenient and allows any non-`42` return code for input format
   validation. BAPCtools expects a code of exactly `43` when validating
-  invalid testcases (see below).)
+  invalid test cases (see below).)
 - For input and answer validation, the out-of-spec `--constraints-file
 <path>` flag is set when running `bt constraints`. The validator can write some
-  statistics on the testcase to this file. See the [implementation
+  statistics on the test case to this file. See the [implementation
   notes](implementation_notes.md#constraints-checking).
 - `<{input,output}_validator_args>` are either empty, or the value of the
-  `{input,output}_validator_args` key in the first `testdata.yaml` file that is found
-  in the directory (testgroup) of the current testcase or its parents.
+  `{input,output}_validator_args` key in the first `test_group.yaml` file that is found
+  in the directory (test group) of the current test case or its parents.
 
 ## Input validation
 
 `bt validate --input`
 
-Test if the testcase input file `testcase.in` file passes the 'input validators'. Each file or
+Test if the test case input file `testcase.in` file passes the 'input validators'. Each file or
 directory in `/input_validators/` is an input validator.
-Input validators receive the testcase on standard input, as
+Input validators receive the test case on standard input, as
 
 ```
 input_validator [input_validator_args] < testcase.in
@@ -55,18 +55,20 @@ input_validator [input_validator_args] < testcase.in
 
 `bt validate --answer`
 
-BAPCtools allows (in fact, encourages) the validation of the `.ans`-file of each testcase.
+BAPCtools allows (in fact, encourages) the validation of the `.ans`-file of each test case.
 As for input validation, every program in `answer_validators` is a validator, and all validators must pass.
-Answer validators receive the testcase answer file on standard input, as
+Answer validators receive the test case answer file on standard input, as
 ```
 answer_validator /path/to/testcase.in [output_validator_args] < testcase.ans
 ```
 
 Answer validation can be as simple as checking that standard input contains a single integer (and nothing else).
-A more advanced use case would be to read an integer `n` from the testcase input file `testcase.in` file provided as the first argument,
+A more advanced use case would be to read an integer `n` from the test case input file `testcase.in` file provided as the first argument,
 followed by verifying that the standard input contains `n` newline-separated integers.
 
-All answer files are also checked with the output validator invoked as
+BAPCtools assumes that all answer files are also valid outputs and therefore also checks that the `.ans` files pass output validation.
+If this assumption is wrong, you can specify `ans_is_output: False` in `problem.yaml` (note that this option is always `False` for interactive or multi-pass problems, because these do not have a single output).
+If enabled, the output validator is invoked as:
 
 ```
 output_validator /path/to/testcase.in /path/to/testcase.ans /path/to/feedbackdir \
@@ -108,7 +110,7 @@ Examples:
 Invalid answers are test cases in `data/invalid_answer`.
 Such a test case consist of input and answer files (`.in` and `.ans`), just like a normal test case.
 The input file must pass input validation (i.e., all input validators must accept).
-The testcase must fail answer validation, i.e., at least one answer validator or the output validator must reject it.
+The test case must fail answer validation, i.e., at least one answer validator or the output validator must reject it.
 The output validator is run in strict mode, i.e., with the flags `case_sensitive` and `space_change_sensitive`;
 to ensure maximum conformity of answer files in the test data.
 
diff --git a/headers/validation.h b/headers/validation.h
index 4b1904fa7..25c4de183 100644
--- a/headers/validation.h
+++ b/headers/validation.h
@@ -10,8 +10,9 @@
 // This strict checking mode is used for *.in and *.ans files.
 // When validating submission outputs, the checking is more lenient,
 // but the case_sensitive and space_change_sensitive flags can be passed
-// via the output_validator_args in testdata.yaml to enable strict checking behaviour
-// for submission outputs regarding case and whitespace, respectively.
+// via the output_validator_args in test_group.yaml or <testcase>.yaml
+// to enable strict checking behaviour for submission outputs
+// regarding case and whitespace, respectively.
 
 #include <algorithm>
 #include <array>
diff --git a/latex/bapc.cls b/latex/bapc.cls
index 71fcf35df..3a43b34e6 100644
--- a/latex/bapc.cls
+++ b/latex/bapc.cls
@@ -423,6 +423,18 @@
 	\fi%
 }
 
+%-------------------------------------------------------------------------------
+% Command to include consatnts.
+% The tooling has to define the commands \constants_<key>{}
+%-------------------------------------------------------------------------------
+\newcommand{\constant}[1]{%
+	\ifcsname constants_#1\endcsname%
+		\csname constants_#1\endcsname%
+	\else%
+		\PackageError{constants}{constant{#1} is not defined}{}%
+	\fi%
+}
+
 %-------------------------------------------------------------------------------
 % The following are required for the overall layout:
 %-------------------------------------------------------------------------------
diff --git a/latex/contest-problem-slide.tex b/latex/contest-problem-slide.tex
index 235bb0784..350c05a6e 100644
--- a/latex/contest-problem-slide.tex
+++ b/latex/contest-problem-slide.tex
@@ -1,4 +1,4 @@
-\begingroup\graphicspath{{{%problemdir%}/problem_statement/}}
+\begingroup\graphicspath{{{%problemdir%}/problem_slide/}{{%problemdir%}/statement/}}
 	\renewcommand{\problemlabel}{{%problemlabel%}}
 	\renewcommand{\problemyamlname}{{%problemyamlname%}}
 	\renewcommand{\problemauthor}{{%problemauthor%}}
@@ -6,7 +6,8 @@
 	\renewcommand{\problemforeground}{{%problemforeground%}}
 	\renewcommand{\problemborder}{{%problemborder%}}
 	\renewcommand{\timelimit}{{%timelimit%}}
-	\input{{%problemdir%}/problem_statement/problem-slide.\lang.tex}
+	\input{{%builddir%}/constants.tex}
+	\input{{%problemdir%}/problem_slide/problem-slide.\lang.tex}
 	\renewcommand{\problemlabel}{}
 	\renewcommand{\problemyamlname}{}
 	\renewcommand{\problemauthor}{}
diff --git a/latex/contest-problem.tex b/latex/contest-problem.tex
index 50472bd37..384ede999 100644
--- a/latex/contest-problem.tex
+++ b/latex/contest-problem.tex
@@ -1,10 +1,11 @@
-\begingroup\graphicspath{{{%problemdir%}/problem_statement/}}
+\begingroup\graphicspath{{{%problemdir%}/statement/}}
 	\renewcommand{\problemlabel}{{%problemlabel%}}
 	\renewcommand{\problemyamlname}{{%problemyamlname%}}
 	\renewcommand{\problemauthor}{{%problemauthor%}}
 	\renewcommand{\timelimit}{{%timelimit%}}
 	\input{{%builddir%}/samples.tex}
-	\input{{%problemdir%}/problem_statement/problem.\lang.tex}
+	\input{{%builddir%}/constants.tex}
+	\input{{%problemdir%}/statement/problem.\lang.tex}
 	\remainingsamples{}
 	\renewcommand{\problemlabel}{}
 	\renewcommand{\problemyamlname}{}
diff --git a/latex/contest-solution.tex b/latex/contest-solution.tex
index fd11f9a8e..4b51c9f2b 100644
--- a/latex/contest-solution.tex
+++ b/latex/contest-solution.tex
@@ -1,9 +1,10 @@
-\begingroup\graphicspath{{{%problemdir%}/problem_statement/}}
+\begingroup\graphicspath{{{%problemdir%}/solution/}{{%problemdir%}/statement/}}
 	\renewcommand{\problemlabel}{{%problemlabel%}}
 	\renewcommand{\problemyamlname}{{%problemyamlname%}}
 	\renewcommand{\problemauthor}{{%problemauthor%}}
 	\renewcommand{\timelimit}{{%timelimit%}}
-	\input{{%problemdir%}/problem_statement/solution.\lang.tex}
+	\input{{%builddir%}/constants.tex}
+	\input{{%problemdir%}/solution/solution.\lang.tex}
 	\renewcommand{\problemlabel}{}
 	\renewcommand{\problemyamlname}{}
 	\renewcommand{\problemauthor}{}
diff --git a/latex/contest-web.tex b/latex/contest-web.tex
index f7bbb7961..2c406fd9d 100644
--- a/latex/contest-web.tex
+++ b/latex/contest-web.tex
@@ -62,7 +62,7 @@
 \input{./contest-problems.tex}
 \makeatletter
 
-% An empty page at the end for non-testsession.
+% An empty page at the end if the contest is not a test session.
 \if\@testsession0
 	\clearpage
 	\pagestyle{empty}
diff --git a/latex/contest.tex b/latex/contest.tex
index 102be2e15..aebbcd94f 100644
--- a/latex/contest.tex
+++ b/latex/contest.tex
@@ -62,7 +62,7 @@
 \input{./contest-problems.tex}
 \makeatletter
 
-% An empty page at the end for non-testsession.
+% An empty page at the end if the contest is not a test session.
 \if\@testsession0
 	\clearpage
 	\pagestyle{empty}
diff --git a/latex/contest_data.tex b/latex/contest_data.tex
index 39d3cc07c..92a9e1a7c 100644
--- a/latex/contest_data.tex
+++ b/latex/contest_data.tex
@@ -3,4 +3,4 @@
 \copyrightyear{{%year%}}
 \author{{%author%}}
 \newcommand{\logofile}{{%logofile%}}
-{%testsession%}
+{%test_session%}
diff --git a/latex/lang/de.tex b/latex/lang/de.tex
index 9670d6a39..07a1adbcd 100644
--- a/latex/lang/de.tex
+++ b/latex/lang/de.tex
@@ -1,4 +1,4 @@
-\newcommand{\langbabel}{german}
+\newcommand{\langbabel}{ngerman}
 
 % bapc.cls
 \newcommand{\langblank}{Diese Seite wurde absichtlich leer gelassen.}
diff --git a/latex/problem-slide.tex b/latex/problem-slide.tex
index 409114b60..642e65363 100644
--- a/latex/problem-slide.tex
+++ b/latex/problem-slide.tex
@@ -1,7 +1,7 @@
 \documentclass[rgb,dvipsnames,aspectratio=169,9pt,t]{beamer}
 \input{problem-slides-base.tex}
 \begin{document}
-\begingroup\graphicspath{{{%problemdir%}/problem_statement/}}
+\begingroup\graphicspath{{{%problemdir%}/problem_slide/}{{%problemdir%}/statement/}}
 	\renewcommand{\problemlabel}{{%problemlabel%}}
 	\renewcommand{\problemyamlname}{{%problemyamlname%}}
 	\renewcommand{\problemauthor}{{%problemauthor%}}
@@ -9,6 +9,7 @@
 	\renewcommand{\problemforeground}{{%problemforeground%}}
 	\renewcommand{\problemborder}{{%problemborder%}}
 	\renewcommand{\timelimit}{{%timelimit%}}
-	\input{{%problemdir%}/problem_statement/problem-slide.\lang.tex}
+	\input{{%builddir%}/constants.tex}
+	\input{{%problemdir%}/problem_slide/problem-slide.\lang.tex}
 \endgroup
 \end{document}
diff --git a/latex/problem-slides-base.tex b/latex/problem-slides-base.tex
index eeffc9ff2..4b17fe054 100644
--- a/latex/problem-slides-base.tex
+++ b/latex/problem-slides-base.tex
@@ -51,6 +51,14 @@
 \newcommand{\fullproblemtitle}{\problemlabel: \problemyamlname}
 \newcommand{\problemtitle}{\problemyamlname}
 
+\newcommand{\constant}[1]{%
+    \ifcsname constants_#1\endcsname%
+        \csname constants_#1\endcsname%
+    \else%
+        \PackageError{constants}{constant{#1} is not defined}{}%
+    \fi%
+}
+
 \usetheme[numbering=none,block=fill]{metropolis}
 
 \newcommand{\illustration}[3]{
diff --git a/latex/problem.tex b/latex/problem.tex
index 413f4c42a..e288a1946 100644
--- a/latex/problem.tex
+++ b/latex/problem.tex
@@ -1,12 +1,13 @@
 \documentclass{bapc}
 \begin{document}
-\begingroup\graphicspath{{{%problemdir%}/problem_statement/}}
+\begingroup\graphicspath{{{%problemdir%}/statement/}}
 	\renewcommand{\problemlabel}{{%problemlabel%}}
 	\renewcommand{\problemyamlname}{{%problemyamlname%}}
 	\renewcommand{\problemauthor}{{%problemauthor%}}
 	\renewcommand{\timelimit}{{%timelimit%}}
 	\input{{%builddir%}/samples.tex}
-	\input{{%problemdir%}/problem_statement/problem.\lang.tex}
+	\input{{%builddir%}/constants.tex}
+	\input{{%problemdir%}/statement/problem.\lang.tex}
 	\remainingsamples{}
 \endgroup
 \end{document}
diff --git a/latex/solution-web.tex b/latex/solution-web.tex
index 6877cb919..8aa40aec2 100644
--- a/latex/solution-web.tex
+++ b/latex/solution-web.tex
@@ -1,12 +1,13 @@
 \documentclass[rgb,dvipsnames,aspectratio=169,9pt,t,handout]{beamer}
 \input{solutions-base.tex}
 \begin{document}
-\begingroup\graphicspath{{{%problemdir%}/problem_statement/}}
+\begingroup\graphicspath{{{%problemdir%}/solution/}{{%problemdir%}/statement/}}
 	\renewcommand{\problemlabel}{{%problemlabel%}}
 	\renewcommand{\problemyamlname}{{%problemyamlname%}}
 	\renewcommand{\problemauthor}{{%problemauthor%}}
 	\renewcommand{\timelimit}{{%timelimit%}}
-	\input{{%problemdir%}/problem_statement/solution.\lang.tex}
+	\input{{%builddir%}/constants.tex}
+	\input{{%problemdir%}/solution/solution.\lang.tex}
 	\renewcommand{\problemlabel}{}
 \endgroup
 \end{document}
diff --git a/latex/solution.tex b/latex/solution.tex
index d92dcfba5..d1186250e 100644
--- a/latex/solution.tex
+++ b/latex/solution.tex
@@ -1,12 +1,13 @@
 \documentclass[rgb,dvipsnames,aspectratio=169,9pt,t]{beamer}
 \input{solutions-base.tex}
 \begin{document}
-\begingroup\graphicspath{{{%problemdir%}/problem_statement/}}
+\begingroup\graphicspath{{{%problemdir%}/solution/}{{%problemdir%}/statement/}}
 	\renewcommand{\problemlabel}{{%problemlabel%}}
 	\renewcommand{\problemyamlname}{{%problemyamlname%}}
 	\renewcommand{\problemauthor}{{%problemauthor%}}
 	\renewcommand{\timelimit}{{%timelimit%}}
-	\input{{%problemdir%}/problem_statement/solution.\lang.tex}
+	\input{{%builddir%}/constants.tex}
+	\input{{%problemdir%}/solution/solution.\lang.tex}
 	\renewcommand{\problemlabel}{}
 \endgroup
 \end{document}
diff --git a/latex/solutions-base.tex b/latex/solutions-base.tex
index 9071d8b53..e34ba8c3d 100644
--- a/latex/solutions-base.tex
+++ b/latex/solutions-base.tex
@@ -55,6 +55,14 @@
 \newcommand{\fullproblemtitle}{\problemlabel: \problemyamlname}
 \newcommand{\problemtitle}{\problemlabel: \problemyamlname}
 
+\newcommand{\constant}[1]{%
+	\ifcsname constants_#1\endcsname%
+		\csname constants_#1\endcsname%
+	\else%
+		\PackageError{constants}{constant{#1} is not defined}{}%
+	\fi%
+}
+
 % If solve_stats/activity/A.pdf exists, define the \activitychart command
 \IfFileExists{solve_stats/activity/A.pdf}{
 	\newcommand{\activitychart}{
diff --git a/readme.md b/readme.md
index 4d6fb5780..a2bc083be 100644
--- a/readme.md
+++ b/readme.md
@@ -182,7 +182,7 @@ them to a separate directory.
 
 - `bt pdf [-v]`
 
-Use this command to compile the `problem.en.pdf` from the `problem_statement/problem.en.tex` LaTeX statement.
+Use this command to compile the `problem.en.pdf` from the `statement/problem.en.tex` LaTeX statement.
 `problem.en.pdf` is written to the problem directory itself.
 
 This can also be used to create the contest pdf by running it from the contest directory.
diff --git a/skel/contest/contest.yaml b/skel/contest/contest.yaml
index 76e26682b..8dc863a13 100644
--- a/skel/contest/contest.yaml
+++ b/skel/contest/contest.yaml
@@ -15,8 +15,8 @@ title: {%title%}
 subtitle: {%subtitle%}
 year: {%year%}
 author: {%author%}
-testsession: {%testsession%}
-print_time_limit: true
+test_session: {%test_session%}
+print_time_limit: True
 
 # problem.yaml defaults
 source_url: {%source_url%}
diff --git a/skel/gitlab_ci/problem.yaml b/skel/gitlab_ci/problem.yaml
index 5bc7cd19d..c58ddaab3 100644
--- a/skel/gitlab_ci/problem.yaml
+++ b/skel/gitlab_ci/problem.yaml
@@ -3,7 +3,9 @@ verify_{%problem%}:
     - ./bt all --cp --error --no-bar --force --jobs 0 --problem {%problem_path%}
   only:
     changes:
-      #- {%problem_path%}/problem_statement/**/*
+      #- {%problem_path%}/statement/**/*
+      #- {%problem_path%}/solution/**/*
+      #- {%problem_path%}/problem_slide/**/*
       - {%problem_path%}/problem.yaml
       - {%problem_path%}/.timelimit
       - {%problem_path%}/data/**/*
diff --git a/skel/problem/generators/example.py b/skel/problem/generators/example_generator.py
similarity index 100%
rename from skel/problem/generators/example.py
rename to skel/problem/generators/example_generator.py
diff --git a/skel/problem/generators/generators.yaml b/skel/problem/generators/generators.yaml
index 2c3a3fe72..1e3eafae4 100644
--- a/skel/problem/generators/generators.yaml
+++ b/skel/problem/generators/generators.yaml
@@ -1,8 +1,7 @@
 #solution: /submissions/accepted/submission.py
-#visualizer: /visualizers/asy.sh
-version: 2025-02  # use this version of the generators framework
+version: 2025-08  # use this version of the generators framework
 
-{%testdata_yaml_comment%}testdata.yaml:
+{%test_group_yaml_comment%}test_group.yaml:
   # One or more of:
   # case_sensitive
   # space_change_sensitive
diff --git a/skel/problem/input_visualizer/example_input_visualizer.py b/skel/problem/input_visualizer/example_input_visualizer.py
new file mode 100644
index 000000000..df1162d22
--- /dev/null
+++ b/skel/problem/input_visualizer/example_input_visualizer.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python3
+import sys
+
+input_file = open(sys.argv[1]).read().strip()
+answer_file = open(sys.argv[2]).read().strip()
+args = sys.argv[3:]
+with open("testcase.svg", "w") as f:
+    # this is unsafe since args could contain svg tags
+    print(f"<svg><text>args: {args}</text></svg>", file=f)
diff --git a/skel/problem/input_visualizer/readme.md b/skel/problem/input_visualizer/readme.md
new file mode 100644
index 000000000..0ef426286
--- /dev/null
+++ b/skel/problem/input_visualizer/readme.md
@@ -0,0 +1,2 @@
+This input visualizer is intended for use with BAPCtools' `bt generate`.
+The visualizer should be invoked as `./visualizer <input_file_path> <answer_file_path> <...input_visualizer_args>` and should write a `testcase.<ext>` file.
diff --git a/skel/problem/output_validators/output_validator/output_validator.cpp b/skel/problem/output_validator/output_validator.cpp
similarity index 100%
rename from skel/problem/output_validators/output_validator/output_validator.cpp
rename to skel/problem/output_validator/output_validator.cpp
diff --git a/skel/problem/output_validator/validation.h b/skel/problem/output_validator/validation.h
new file mode 120000
index 000000000..8394e5a44
--- /dev/null
+++ b/skel/problem/output_validator/validation.h
@@ -0,0 +1 @@
+../../../headers/validation.h
\ No newline at end of file
diff --git a/skel/problem/output_visualizer/example_output_visualizer.py b/skel/problem/output_visualizer/example_output_visualizer.py
new file mode 100644
index 000000000..0d27bcb5e
--- /dev/null
+++ b/skel/problem/output_visualizer/example_output_visualizer.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+import sys
+
+input_file = open(sys.argv[1]).read().strip()
+answer_file = open(sys.argv[2]).read().strip()
+# input yields the team output
+args = sys.argv[4:]
+with open(f"{sys.argv[3]}/judgeimage.svg", "w") as f:
+    # this is unsafe since args could contain svg tags
+    print(f"<svg><text>args: {args}</text></svg>", file=f)
diff --git a/skel/problem/problem.yaml b/skel/problem/problem.yaml
index 37874dcfc..7b1f2ca75 100644
--- a/skel/problem/problem.yaml
+++ b/skel/problem/problem.yaml
@@ -7,8 +7,7 @@ name:
 {%problemname%}
 uuid: {%uuid%}
 credits: {%author%}
-{%source%}
-license: {%license%}
+{%source%}license: {%license%}
 {%rights_owner%}
 # limits:
 #   time_limit: 1.0
diff --git a/skel/problem/problem_statement/problem-slide.en.tex b/skel/problem/problem_slide/problem-slide.en.tex
similarity index 100%
rename from skel/problem/problem_statement/problem-slide.en.tex
rename to skel/problem/problem_slide/problem-slide.en.tex
diff --git a/skel/problem/problem_statement/solution.en.tex b/skel/problem/solution/solution.en.tex
similarity index 100%
rename from skel/problem/problem_statement/solution.en.tex
rename to skel/problem/solution/solution.en.tex
diff --git a/skel/problem/problem_statement/problem.en.tex b/skel/problem/statement/problem.en.tex
similarity index 100%
rename from skel/problem/problem_statement/problem.en.tex
rename to skel/problem/statement/problem.en.tex
diff --git a/skel/problem_cfp/problem_statement/solution.en.tex b/skel/problem_cfp/solution/solution.en.tex
similarity index 100%
rename from skel/problem_cfp/problem_statement/solution.en.tex
rename to skel/problem_cfp/solution/solution.en.tex
diff --git a/skel/problem_cfp/problem_statement/problem.en.tex b/skel/problem_cfp/statement/problem.en.tex
similarity index 100%
rename from skel/problem_cfp/problem_statement/problem.en.tex
rename to skel/problem_cfp/statement/problem.en.tex
diff --git a/support/schemas/generators.cue b/support/schemas/generators.cue
index 98d3821b3..bd94e0da2 100644
--- a/support/schemas/generators.cue
+++ b/support/schemas/generators.cue
@@ -20,57 +20,60 @@ import "strings"
 	_parts: [#path, ...#command_args]
 }
 
-// Test cases and test groups allow configuration of solution, visualiser, and random salt.
+// Test cases and test groups allow configuration of solution, and random salt.
 #config: {
 	// Path to solution starts with slash, such as "/submissions/accepted/foo.py"
 	solution?: #filepath & =~"^/"
-	// Visualiser can be omitted to disable visualisation, may not use {count}
-	visualizer?:  #command & =~"^/" & !~"\\{count" | null
 	random_salt?: string
 }
 
-#testgroup_config: {
+#test_group_config: {
 	#config
-	"testdata.yaml": #testdata_settings
+	"test_group.yaml": #test_group_settings
 }
 
-#testcase:
+#test_case:
 	#command & !~"^/" |
 	{
 		generate?: #command & !~"^/"
 		count?:    int & >=1 & <=100
-		// The "copy" key uses a path relative to "/generators/" ending in a testcase name,
+		// The "copy" key uses a path relative to "/generators/" ending in a test case name,
 		// such as "manual/samples/3".
-		copy?:                                    #dirpath
-		["in" | "ans" | "out" | "desc" | "hint"]: string
-		interaction?:                             =~"^([<>][^\\n]*\\n)+$"
+		copy?: #dirpath
+
+		["in" | "in.statement" | "in.download" |
+		 "ans" | "ans.statement" | "ans.download" |
+		 "out"]: string
+		interaction?: =~"^([<>][^\\n]*\\n)+$"
+		yaml?: #test_case_config
+
 		#config
 	}
 
-#data_dict: {[#name]: #testgroup | #testcase}
-#data_list: {[#name | ""]: #testgroup | #testcase} & struct.MinFields(1) & struct.MaxFields(1)
+#data_dict: {[#name]: #test_group | #test_case}
+#data_list: {[#name | ""]: #test_group | #test_case} & struct.MinFields(1) & struct.MaxFields(1)
 
-#testgroup: {
+#test_group: {
 	data?: #data_dict | [...#data_list]
 	include?: [...#dirpath]
-	#testgroup_config
+	#test_group_config
 }
 
 #Generators: {
-	// Generators are named like files or testcases, like "tree.py" or "a".
+	// Generators are named like files or test cases, like "tree.py" or "a".
 	// Each consists of a nonempty list of paths relative to "/generators/",
 	// such as ["tree_generator/tree.py", "lib.py"].
 	generators?: [#name]: [...(#path & !~"^/")] & [_, ...]
 	data: close({
-		sample!:          #testgroup
-		secret!:          #testgroup
-		invalid_input?:  #testgroup
-		invalid_answer?: #testgroup
-		invalid_output?: #testgroup
-		valid_output?: #testgroup
+		sample!:         #test_group
+		secret!:         #test_group
+		invalid_input?:  #test_group
+		invalid_answer?: #test_group
+		invalid_output?: #test_group
+		valid_output?:   #test_group
 	})
-	#testgroup_config
-	version: =~"^[0-9]{4}-[0-9]{2}$" | *"2025-02"
+	#test_group_config
+	version: =~"^[0-9]{4}-[0-9]{2}$" | *"2025-08"
 
 	... // Do allow unknown_key at top level for tooling
 }
diff --git a/support/schemas/generators_yaml_schema.json b/support/schemas/generators_yaml_schema.json
index a784a6210..7a29f5cab 100644
--- a/support/schemas/generators_yaml_schema.json
+++ b/support/schemas/generators_yaml_schema.json
@@ -2,7 +2,7 @@
   "$schema": "https://json-schema.org/draft/2020-12/schema",
   "$id": "https://json.schemastore.org/problem_package_generators.json",
   "$defs": {
-    "testgroup": {
+    "test_group": {
       "type": "object",
       "title": "Test Group",
       "description": "A test group",
@@ -37,39 +37,27 @@
             "type": "string"
           }
         },
-        "testdata.yaml": {
-          "$ref": "#/$defs/testdata_settings"
+        "test_group.yaml": {
+          "$ref": "#/$defs/test_group_settings"
         },
         "solution": {
           "$ref": "#/$defs/solution"
-        },
-        "visualizer": {
-          "$ref": "#/$defs/visualizer"
         }
       },
       "additionalProperties": false
     },
-    "testdata_settings": {
+    "test_case_or_group_settings": {
       "type": "object",
-      "title": "Test data settings",
-      "description": "The settings that apply to the test data for this test group. Will be copied to this test group's `testdata.yaml`.",
       "properties": {
-        "on_reject": {
-          "enum": ["break", "continue"],
-          "default": "break"
-        },
-        "grading": {
-          "enum": ["default", "custom"]
-        },
-        "grader_flags": {
-          "type": "string",
-          "examples": ["min", "sum"]
+        "args": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "description": "Defines arguments passed to the submission for the test case/group."
         },
         "input_validator_args": {
           "oneOf": [
-            {
-              "type": "string"
-            },
             {
               "type": "array",
               "items": {
@@ -80,7 +68,10 @@
               "type": "object",
               "patternProperties": {
                 "^([A-Za-z0-9][A-Za-z0-9_-]*[A-Za-z0-9]|[A-Za-z0-9])$":{
-                  "type": "string"
+                  "type": "array",
+                  "items": {
+                    "type": "string"
+                  }
                 }
               }
             }
@@ -88,19 +79,26 @@
           "description": "Defines arguments passed to each input validator for the test case/group. If a sequence of strings, then those are the arguments that will be passed to each input validator for this the case/group. If a map, then each key is the name of the input validator and the value is the arguments to pass to that input validator for the test case/group. Validators not present in the map are run without any arguments."
         },
         "output_validator_args": {
-          "oneOf": [
-            {
-              "type": "string"
-            },
-            {
-              "type": "array",
-              "items": {
-                "type": "string"
-              }
-            }
-          ],
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
           "description": "Defines arguments passed to the output validator for the test case/group."
         },
+        "input_visualizer_args": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "description": "Defines arguments passed to the input visualizer for the test case/group."
+        },
+        "output_visualizer_args": {
+          "type": "array",
+          "items": {
+            "type": "string"
+          },
+          "description": "Defines arguments passed to the output visualizer for the test case/group."
+        },
         "input_validator_flags": {
           "type": "string",
           "deprecated": true,
@@ -109,19 +107,102 @@
         "output_validator_flags": {
           "type": "string",
           "deprecated": true,
-          "description": "With 'problem_format_version: 2023-07-draft' in problem.yaml, use input_validator_args instead."
-        },
-        "accept_score": {
-          "type": "string"
-        },
-        "reject_score": {
-          "type": "string"
-        },
-        "range": {
-          "type": "string"
+          "description": "With 'problem_format_version: 2023-07-draft' in problem.yaml, use output_validator_args instead."
         }
       }
     },
+    "test_case_settings": {
+      "title": "Test case settings",
+      "description": "The settings that apply to this test case.",
+      "allOf": [
+        {
+          "$ref": "#/$defs/test_case_or_group_settings"
+        },
+        {
+          "type": "object",
+          "properties": {
+            "hint": {
+              "type": "string",
+              "description": "A hint provides feedback for solving a test case to, e.g., somebody whose submission didn't pass."
+            },
+            "description": {
+              "type": "string",
+              "description": "A description conveys the purpose of a test case. It is an explanation of what aspect or edge case of the solution the input file is meant to test."
+            }
+          }
+        }
+      ]
+    },
+    "test_group_settings": {
+      "title": "Test group settings",
+      "description": "The settings that apply to the test data for this test group. Will be copied to this test group's `test_group.yaml`.",
+      "allOf": [
+        {"$ref": "#/$defs/test_case_or_group_settings"},
+        {
+          "type": "object",
+          "properties": {
+            "scoring": {
+              "type": "object",
+              "title": "Scoring settings",
+              "description": "For scoring problems, submissions are given a non-negative score instead of a verdict. The goal of each submission is to maximize this score. The scoring behavior is configured for `secret` and each test data group using the `scoring` object.",
+              "properties": {
+                "score": {
+                  "oneOf": [
+                    {
+                      "type": "integer",
+                      "minimum": 1
+                    },
+                    {
+                      "enum": ["unbounded"]
+                    }
+                  ]
+                },
+                "aggregation": {
+                  "enum": ["pass-fail", "sum", "min"]
+                },
+                "require_pass": {
+                  "oneOf": [
+                    {
+                      "type": "string"
+                    },
+                    {
+                      "type": "array",
+                      "items": {
+                        "type": "string"
+                      }
+                    }
+                  ]
+                }
+              }
+            },
+            "static_validation": {
+              "oneOf": [
+                {
+                  "type": "boolean"
+                },
+                {
+                  "type": "object",
+                  "properties": {
+                    "args": {
+                      "type": "string",
+                      "description": "Represents the additional arguments passed to the static validator in this group's static validation test case."
+                    },
+                    "score": {
+                      "type": "integer",
+                      "description": "The maximum score of the static validation test case."
+                    }
+                  }
+                }
+              ]
+            },
+            "full_feedback": {
+              "description": "Defaults to `false` in `secret` and `true` in `sample`.\nWhen `full_feedback` is `true`, somebody whose submission didn't pass case should be shown:\n- the given input,\n- the produced output (stdout),\n- any error messages (stderr),\n- the illustration created by the output visualizer (if applicable),\n- the expected output.",
+              "type": "boolean"
+            }
+          }
+        }
+      ]
+    },
     "data_dict": {
       "title": "Data Dictionary",
       "description": "Defines the contents of a test group",
@@ -130,10 +211,10 @@
         "^([A-Za-z0-9][A-Za-z0-9_-]*[A-Za-z0-9]|[A-Za-z0-9]|)$": {
           "oneOf": [
             {
-              "$ref": "#/$defs/testgroup"
+              "$ref": "#/$defs/test_group"
             },
             {
-              "$ref": "#/$defs/testcase"
+              "$ref": "#/$defs/test_case"
             }
           ]
         }
@@ -141,7 +222,7 @@
       "additionalProperties": false,
       "minProperties": 1
     },
-    "testcase": {
+    "test_case": {
       "title": "Test Case",
       "description": "A test case, i.e., a single instance to the problem.",
       "oneOf": [
@@ -174,25 +255,35 @@
               "title": "Input",
               "description": "Explicit input given as a string"
             },
+            "in.statement": {
+              "type": "string",
+              "title": "Input (statement)",
+              "description": "Explicit input given as a string, only shown in problem statement (defaults to 'in')"
+            },
+            "in.download": {
+              "type": "string",
+              "title": "Input (download)",
+              "description": "Explicit input given as a string, only shown as sample download in the contest system (defaults to 'in.statement' or 'in')"
+            },
             "ans": {
               "type": "string",
               "title": "Default Answer",
               "description": "Explicit default answer given as a string"
             },
-            "out": {
+            "ans.statement": {
               "type": "string",
-              "title": "Invalid output",
-              "description": "Explicit (in)valid output given as a string; can only be given in (in)valid_output"
+              "title": "Default Answer (statement)",
+              "description": "Explicit default answer given as a strans, only shown in problem statement (defaults to 'in')"
             },
-            "desc": {
+            "ans.download": {
               "type": "string",
-              "title": "Description",
-              "description": "Privileged information explaining the purpose of this test case given as a string"
+              "title": "Default Answer (download)",
+              "description": "Explicit default answer given as a string, only shown as sample download in the contest system (defaults to 'ans.statement' or 'ans')"
             },
-            "hint": {
+            "out": {
               "type": "string",
-              "title": "Hint",
-              "description": "Feedback shown to the solver about this test case given as a string"
+              "title": "Output",
+              "description": "Explicit (in)valid output given as a string; can only be given in sample or (in)valid_output"
             },
             "interaction": {
               "title": "Sample interaction",
@@ -201,8 +292,8 @@
               "type": "string",
               "pattern": "^([<>][^\\n]*\\n)+$"
             },
-            "visualizer": {
-              "$ref": "#/$defs/visualizer"
+            "yaml": {
+              "$ref": "#/$defs/test_case_settings"
             },
             "random_salt": {
               "$ref": "#/$defs/random_salt"
@@ -215,24 +306,6 @@
         }
       ]
     },
-    "visualizer": {
-      "title": "Visualizer",
-      "description": "Absolute path to and arguments for a visualizer. Leave empty to disable visualizion.",
-      "examples": [
-        "/visualizer",
-        "/visualizers/asy.py",
-        "/visualizers/vis --large"
-      ],
-      "oneOf": [
-        {
-          "type": "string",
-          "pattern": "^\/([^{}]|\\{name\\})*(\\{seed(:[0-9]+)?\\})?([^{}]|\\{name\\})*$"
-        },
-        {
-          "type": "null"
-        }
-      ]
-    },
     "random_salt": {
       "title": "Random Salt",
       "type": "string",
@@ -277,14 +350,11 @@
     }
   },
   "additionalProperties": true,
-  "description": "Generate test data for this problem. Version 2025-02.",
+  "description": "Generate test data for this problem. Version 2025-08.",
   "properties": {
     "solution": {
       "$ref": "#/$defs/solution"
     },
-    "visualizer": {
-      "$ref": "#/$defs/visualizer"
-    },
     "random_salt": {
       "$ref": "#/$defs/random_salt"
     },
@@ -303,43 +373,43 @@
       },
       "additionalProperties": false
     },
+    "test_group.yaml": {
+      "$ref": "#/$defs/test_group_settings"
+    },
     "data": {
       "title": "Test data root",
       "description": "The root test group. Must contain the test groups 'sample' and 'secret'.",
       "type": "object",
       "properties": {
         "sample": {
-          "$ref": "#/$defs/testgroup",
+          "$ref": "#/$defs/test_group",
           "title": "Sample inputs",
           "description": "Test cases shown to the solver in the problem statement"
         },
         "secret": {
-          "$ref": "#/$defs/testgroup",
+          "$ref": "#/$defs/test_group",
           "title": "Secret inputs",
           "description": "The test cases against which submissions are validated"
         },
         "invalid_input": {
-          "$ref": "#/$defs/testgroup",
+          "$ref": "#/$defs/test_group",
           "title": "Invalid inputs",
           "description": "Test cases whose input files are invalid"
         },
         "invalid_answer": {
-          "$ref": "#/$defs/testgroup",
+          "$ref": "#/$defs/test_group",
           "title": "Invalid answers",
           "description": "Test cases whose answer files are invalid. Inputs must be valid."
         },
         "invalid_output": {
-          "$ref": "#/$defs/testgroup",
+          "$ref": "#/$defs/test_group",
           "title": "Invalid outputs",
           "description": "Valid test cases for which the `.out` string is rejected by output validation."
         },
         "valid_output": {
-          "$ref": "#/$defs/testgroup",
+          "$ref": "#/$defs/test_group",
           "title": "Valid outputs",
           "description": "Valid test cases for which the `.out` must pass output validation."
-        },
-        "testdata.yaml": {
-          "$ref": "#/$defs/testdata_settings"
         }
       },
       "additionalProperties": false,
diff --git a/support/schemas/problemformat.cue b/support/schemas/problemformat.cue
index e063dd16a..0291b5a4a 100644
--- a/support/schemas/problemformat.cue
+++ b/support/schemas/problemformat.cue
@@ -1,6 +1,6 @@
 package problemformat
 
-// Directory names, as well as names of testcases and generators are
+// Directory names, as well as names of test cases and generators are
 // alphanumerical with internal underscores and hyphens; such as
 // "huge", "make_tree", "3", "a", or "connected_graph-01";
 // but not "huge_" or "-2" or "bapc.24" or ".." or "".
@@ -16,19 +16,35 @@ let filename = "[A-Za-z0-9][A-Za-z0-9_.-]{0,253}[A-Za-z0-9]"
 
 #filepath: =~"^/?(\(dirname)/)*\(filename)$"
 
-// Paths can both refer to objects like the testgroup "data/secret/huge" or
+// Paths can both refer to objects like the test group "data/secret/huge" or
 // a program file like "/submissions/accepted/x.cpp"
-
 #path: #dirpath | #filepath
+
 // Test data settings
+#test_case_or_group_settings: {
+	args?: *[] | [string]
+	input_validator_args?: *[] | [string] | {[string]: [string]}
+	output_validator_args?: *[] | [string]
+	input_visualizer_args?: *[] | [string]
+	output_visualizer_args?: *[] | [string]
+	full_feedback?: bool
+}
 
-#testdata_settings: {
-	input_validator_args?: *"" | string | {[string]: string}
-	output_validator_args?: *"" | string
-	grading?: {
-		score?:       >0
-		max_score?:   >0
+#test_case_settings: {
+    #test_case_or_group_settings
+	hint?: string
+	description?: string
+}
+
+#test_group_settings: {
+	scoring?: {
+		score?:       >0 | "unbounded"
 		aggregation?: "sum" | "min"
-		// run_samples?: bool
+		require_pass: string | [string]
+	}
+    #test_case_or_group_settings
+	static_validation?: *false | true | {
+		args?: string
+		score?: int
 	}
 }
diff --git a/test/problems/boolfind/domjudge-problem.ini b/test/problems/boolfind/domjudge-problem.ini
deleted file mode 100644
index e90ee3538..000000000
--- a/test/problems/boolfind/domjudge-problem.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-probid='A'
-allow_submit='1'
-allow_judge='1'
-timelimit='1'
-color='#FFFFFF'
diff --git a/test/problems/boolfind/output_validators/boolfind_run/build b/test/problems/boolfind/output_validator/build
similarity index 100%
rename from test/problems/boolfind/output_validators/boolfind_run/build
rename to test/problems/boolfind/output_validator/build
diff --git a/test/problems/boolfind/output_validators/boolfind_run/run b/test/problems/boolfind/output_validator/run
similarity index 100%
rename from test/problems/boolfind/output_validators/boolfind_run/run
rename to test/problems/boolfind/output_validator/run
diff --git a/test/problems/boolfind/output_validators/boolfind_run/runjury_boolfind.c b/test/problems/boolfind/output_validator/runjury_boolfind.c
similarity index 100%
rename from test/problems/boolfind/output_validators/boolfind_run/runjury_boolfind.c
rename to test/problems/boolfind/output_validator/runjury_boolfind.c
diff --git a/test/problems/boolfind/problem.yaml b/test/problems/boolfind/problem.yaml
index 8f103f71c..400e10cd2 100644
--- a/test/problems/boolfind/problem.yaml
+++ b/test/problems/boolfind/problem.yaml
@@ -1,26 +1,11 @@
+problem_format_version: 2023-07-draft
+type: interactive
 name: boolfind
-author: DOMjudge
-# BAPC 2020
-source:
-# 2020.bapc.eu
-source_url:
+credits:
+  authors: DOMjudge
 uuid: 8f7ed1ba-43f5-424e-9af4-8a5f2e428ce3
 license: unknown
 rights_owner:
-# 'default', 'custom', or 'interactive'
-validation: custom interactive
 
-# One or more of:
-# case_sensitive
-# space_change_sensitive
-# float_absolute_tolerance eps
-# float_relative_tolerance eps
-# float_tolerance eps
-#validator_flags:
-
-# To change the time limit factors for Kattis, use:
-# limits:
-# Time limit is 2*slowest accepted submission:
-#     time_multiplier: 2
-# Warning for submissions within 1 second of limit:
-#     time_safety_margin: 1
+limits:
+  time_limit: 1.0
diff --git a/test/problems/boolfind/problem_statement/problem.en.tex b/test/problems/boolfind/statement/problem.en.tex
similarity index 100%
rename from test/problems/boolfind/problem_statement/problem.en.tex
rename to test/problems/boolfind/statement/problem.en.tex
diff --git a/test/problems/constants/.gitignore b/test/problems/constants/.gitignore
new file mode 100644
index 000000000..8a1fac419
--- /dev/null
+++ b/test/problems/constants/.gitignore
@@ -0,0 +1,3 @@
+#GENERATED BY BAPCtools
+data/*
+!data/sample/
diff --git a/test/problems/constants/data/sample/1.ans b/test/problems/constants/data/sample/1.ans
new file mode 100644
index 000000000..7ed6ff82d
--- /dev/null
+++ b/test/problems/constants/data/sample/1.ans
@@ -0,0 +1 @@
+5
diff --git a/test/problems/constants/data/sample/1.in b/test/problems/constants/data/sample/1.in
new file mode 100644
index 000000000..7ed6ff82d
--- /dev/null
+++ b/test/problems/constants/data/sample/1.in
@@ -0,0 +1 @@
+5
diff --git a/test/problems/constants/generators/example.py b/test/problems/constants/generators/example.py
new file mode 100644
index 000000000..392657933
--- /dev/null
+++ b/test/problems/constants/generators/example.py
@@ -0,0 +1,6 @@
+#!/usr/bin/python3
+import sys
+
+values = sys.argv[1:] + ["{{INT_FIVE}}", "{{STRING_FIVE}}", "5"]
+assert len(set(values)) == 1
+print(values[0])
diff --git a/test/problems/constants/generators/generators.yaml b/test/problems/constants/generators/generators.yaml
new file mode 100644
index 000000000..bafb42925
--- /dev/null
+++ b/test/problems/constants/generators/generators.yaml
@@ -0,0 +1,27 @@
+solution: /submissions/accepted/submission.py
+
+data:
+  sample:
+    data:
+      - '': example.py {{INT_FIVE}} {{STRING_FIVE}} 5  # substituted
+  secret:
+    include:
+      - sample
+
+  invalid_input:
+    data:
+      dont_substitute:
+        in: "{{INT_FIVE}}"  # not substituted
+
+  invalid_answer:
+    data:
+      dont_substitute:
+        in: "5"
+        ans: "{{INT_FIVE}}"  # not substituted
+
+  invalid_output:
+    data:
+      dont_substitute:
+        in: "5"
+        ans: "5"
+        out: "{{INT_FIVE}}"  # not substituted
diff --git a/test/problems/constants/input_validators/input_validator/input_validator.cpp b/test/problems/constants/input_validators/input_validator/input_validator.cpp
new file mode 100644
index 000000000..bd2af7b5c
--- /dev/null
+++ b/test/problems/constants/input_validators/input_validator/input_validator.cpp
@@ -0,0 +1,7 @@
+#include "validation.h"
+
+int main(int argc, char** argv) {
+	InputValidator v(argc, argv);
+	int n = v.read_integer("n", {{INT_FIVE}}, {{STRING_FIVE}});
+	v.newline();
+}
diff --git a/test/problems/constants/input_validators/input_validator/validation.h b/test/problems/constants/input_validators/input_validator/validation.h
new file mode 120000
index 000000000..2b74c5d6a
--- /dev/null
+++ b/test/problems/constants/input_validators/input_validator/validation.h
@@ -0,0 +1 @@
+../../../../../headers/validation.h
\ No newline at end of file
diff --git a/test/problems/constants/input_validators/validate.ctd b/test/problems/constants/input_validators/validate.ctd
new file mode 100644
index 000000000..ec68f8dec
--- /dev/null
+++ b/test/problems/constants/input_validators/validate.ctd
@@ -0,0 +1,2 @@
+INT({{INT_FIVE}}, {{STRING_FIVE}}) NEWLINE
+EOF
diff --git a/test/problems/constants/output_validator/output_validator.cpp b/test/problems/constants/output_validator/output_validator.cpp
new file mode 100644
index 000000000..3b8d4d53a
--- /dev/null
+++ b/test/problems/constants/output_validator/output_validator.cpp
@@ -0,0 +1,12 @@
+#include "validation.h"
+
+int main(int argc, char *argv[]) {
+    // Set up the input and answer streams.
+    std::ifstream in(argv[1]);
+    OutputValidator v(argc, argv);
+
+    int input;
+    in >> input;
+    int answer = v.read_integer("answer", {{INT_FIVE}}, {{STRING_FIVE}});
+    v.newline();
+}
diff --git a/skel/problem/output_validators/output_validator/validation.h b/test/problems/constants/output_validator/validation.h
similarity index 100%
rename from skel/problem/output_validators/output_validator/validation.h
rename to test/problems/constants/output_validator/validation.h
diff --git a/test/problems/constants/problem.yaml b/test/problems/constants/problem.yaml
new file mode 100644
index 000000000..5b1678d6a
--- /dev/null
+++ b/test/problems/constants/problem.yaml
@@ -0,0 +1,18 @@
+# Specification: https://icpc.io/problem-package-format/spec/2023-07-draft.html
+problem_format_version: 2023-07-draft
+# 'pass-fail', 'interactive', 'multi-pass', or 'interactive multi-pass'
+type: pass-fail
+name:
+  #lang: name
+  en: constants
+uuid: 8ee7605a-26db-897d-15c8-b72d4e1bfcbb
+credits: BAPCtools
+license: cc by-sa
+rights_owner: author
+
+# limits:
+#   time_limit: 1.0
+
+constants:
+  INT_FIVE: 5
+  STRING_FIVE: "5"
diff --git a/test/problems/constants/problem_slide/problem-slide.en.tex b/test/problems/constants/problem_slide/problem-slide.en.tex
new file mode 100644
index 000000000..a5bc2a282
--- /dev/null
+++ b/test/problems/constants/problem_slide/problem-slide.en.tex
@@ -0,0 +1,7 @@
+\newcommand{\maxn}{1000}
+
+\begin{frame}
+    \frametitle{\problemtitle}
+
+    Output a single integer \constant{INT_FIVE}.
+\end{frame}
diff --git a/test/problems/constants/solution/solution.en.tex b/test/problems/constants/solution/solution.en.tex
new file mode 100644
index 000000000..1216d6f3c
--- /dev/null
+++ b/test/problems/constants/solution/solution.en.tex
@@ -0,0 +1,4 @@
+\begin{frame}
+    \frametitle{\problemtitle}
+    Output a single integer \constant{INT_FIVE}.
+\end{frame}
diff --git a/test/problems/constants/statement/problem.en.tex b/test/problems/constants/statement/problem.en.tex
new file mode 100644
index 000000000..c80ed5ef9
--- /dev/null
+++ b/test/problems/constants/statement/problem.en.tex
@@ -0,0 +1,14 @@
+\problemname{}
+
+Output a single integer \constant{STRING_FIVE}.
+
+\begin{Input}
+    The input consists of:
+    \begin{itemize}
+        \item One line with a single integer \constant{INT_FIVE}.
+    \end{itemize}
+\end{Input}
+
+\begin{Output}
+    Output a single integer \constant{INT_FIVE}.
+\end{Output}
diff --git a/test/problems/constants/submissions/accepted/submission.py b/test/problems/constants/submissions/accepted/submission.py
new file mode 100644
index 000000000..8f3072e43
--- /dev/null
+++ b/test/problems/constants/submissions/accepted/submission.py
@@ -0,0 +1,2 @@
+#!/usr/bin/python3
+print(5)
diff --git a/test/problems/constants/submissions/wrong_answer/constant_in_submission.py b/test/problems/constants/submissions/wrong_answer/constant_in_submission.py
new file mode 100644
index 000000000..e6d6ed614
--- /dev/null
+++ b/test/problems/constants/submissions/wrong_answer/constant_in_submission.py
@@ -0,0 +1,2 @@
+#!/usr/bin/python3
+print("{{STRING_FIVE}}")
diff --git a/test/problems/contest.yaml b/test/problems/contest.yaml
index 4911eef4d..d559013ac 100644
--- a/test/problems/contest.yaml
+++ b/test/problems/contest.yaml
@@ -8,4 +8,4 @@ title: Problems
 subtitle:
 year: 2020
 author: Ragnar Groot Koerkamp
-testsession: false
+test_session: False
diff --git a/test/problems/different/output_validators/different_validator/validate.cc b/test/problems/different/output_validator/validate.cc
similarity index 100%
rename from test/problems/different/output_validators/different_validator/validate.cc
rename to test/problems/different/output_validator/validate.cc
diff --git a/test/problems/different/output_validators/different_validator/validate.h b/test/problems/different/output_validator/validate.h
similarity index 100%
rename from test/problems/different/output_validators/different_validator/validate.h
rename to test/problems/different/output_validator/validate.h
diff --git a/test/problems/different/problem.yaml b/test/problems/different/problem.yaml
index 48ed7e265..dcaa03958 100644
--- a/test/problems/different/problem.yaml
+++ b/test/problems/different/problem.yaml
@@ -1,16 +1,14 @@
 # problem.yaml
 
 
+problem_format_version: 2023-07-draft
+type: pass-fail
 name:
   en: A Different Problem
 ## At least one of author, source, or rights_owner must be provided.
-##
-## Author of the problem (default: null)
-# author:
 
 ## Where the problem was first used (default: null)
 source: Kattis
-# source_url:
 
 # Unique problem uuid
 uuid: FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF
@@ -19,97 +17,11 @@ uuid: FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF
 ## value of source if no author given).
 # rights_owner:
 
-## License (see below for list of possible values)
 license: cc by-sa
 
-## Some keywords describing the problem (default: empty)
-# keywords:
-
-# Indicate that we use a custom output validator instead of the
-# default token-based diff.
-validation: custom
-#  validator_flags: float_tolerance 1e-4
-
 # Override standard limits: say that the TLE solutions provided should
 # be at least 4 times above the time limit in order for us to be
 # happy.
 limits:
-#  time_multiplier: 5
-  time_safety_margin: 4           # (default is 2)
-#  memory: 1024                   # MB
-#  output: 8                      # MB
-#  compilation_time: 60           # seconds
-#  validation_time: 60            # seconds
-#  validation_memory: 1024        # MB
-#  validation_output: 8           # MB
-
-
-############################################################################
-# POSSIBLE VALUES FOR LICENSE:
-#
-# "unknown"				The default value. In practice means that the
-#               		problem can not be used.
-# "public domain"		There are no known copyrights on the problem,
-# 		   				anywhere in the world.
-#						http://creativecommons.org/about/pdm
-# "cc0" 				CC0, "no rights reserved"
-# 						http://creativecommons.org/about/cc0
-# "cc by"				CC attribution
-# 	  					http://creativecommons.org/licenses/by/3.0/
-# "cc by-sa"			CC attribution, share alike
-# 	  					http://creativecommons.org/licenses/by-sa/3.0/
-# "educational"			May be freely used for educational purposes
-# "permission" 			Used with permission. The author must be contacted
-# 						for every additional use.
-############################################################################
-
-
-############################################################################
-# OUTPUT VALIDATOR OPTIONS
-#
-# There is a relatively versatile default validator available that is
-# sufficient for most problems.  If the problem needs a custom output
-# validator, the validation field should be set to "custom".  The
-# validator_flags field is just a list of command line arguments that
-# are passed on to the validator program used (whether it be the
-# default validator or a custom validator).
-############################################################################
-
-
-############################################################################
-# DESCRIPTION OF DEFAULT VALIDATOR OPTIONS
-#
-# The default validator is essentially a beefed-up diff. In its default
-# mode, it tokenizes the two files and compares token by token. It
-# supports the following command-line arguments to control how tokens
-# are compared.
-#
-#  o case_sensitive
-#       indicates that comparisons should be case-sensitive
-#  o space_change_sensitive
-#       indicates that changes in the amount of whitespace should
-#       be rejected (the de- fault is that any sequence of 1 or more
-#       whitespace characters are equivalent).
-#  o float_relative_tolerance eps
-#       indicates that floating-point tokens should be accepted if
-#       they are within relative error <= eps
-#  o float_absolute_tolerance eps
-#       indicates that floating-point tokens should be accepted if
-#       they are within absolute error <= eps
-#  o float_tolerance eps
-#       short-hand for applying eps as both relative and absolute
-#       tolerance.
-#
-# Note that when supplying both a relative and an absolute tolerance,
-# the semantics are that a token is accepted if it is within either of
-# the two tolerances.
-#
-# When a floating-point tolerance has been set, any valid formatting
-# of floating point numbers is accepted for floating point tokens. So
-# for instance if a token in the answer file says 0.0314, a token of
-# 3.14000000e-2 in the output file would be accepted (but note that
-# this applies *only* to floating point tokens, so "2.0e2" would *not*
-# be a correct output if the answer file says "200"). If no floating
-# point tolerance has been set, floating point tokens are treated just
-# like any other token and has to match exactly.
-############################################################################
+  time_multipliers:
+    time_limit_to_tle: 4
diff --git a/test/problems/different/problem_statement/problem.en.tex b/test/problems/different/statement/problem.en.tex
similarity index 100%
rename from test/problems/different/problem_statement/problem.en.tex
rename to test/problems/different/statement/problem.en.tex
diff --git a/test/problems/divsort/generators/generators.yaml b/test/problems/divsort/generators/generators.yaml
index 6775d7ebf..9b3f9e38f 100644
--- a/test/problems/divsort/generators/generators.yaml
+++ b/test/problems/divsort/generators/generators.yaml
@@ -6,23 +6,22 @@ data:
           in: 9.0 3.0 ab cd
           ans: 3.0 abcd
   secret:
-    testdata.yaml:
+    test_group.yaml:
       input_validator_args:
-        integers: small
+        integers: [small]
     data:
       integers:
-        testdata.yaml:
-          input_validator_args:
-            --integer
+        test_group.yaml:
+          input_validator_args: [--integer]
             #grading: foo
         data:
           - unsorted-integer:
               in: 10.0 2.0 ba cd
               ans: 5.0 abcd
       sorted:
-        testdata.yaml:
+        test_group.yaml:
           input_validator_args:
-            strings: --sorted
+            strings: [--sorted]
         data:
           - sorted-integer:
               in: 10.0 1.0 ab cd
@@ -30,47 +29,49 @@ data:
       general:
         data:
           nested_1:
-            testdata.yaml:
-              input_validator_args: --small
+            test_group.yaml:
+              input_validator_args: [--small]
             data:
               small_floats:
                 in: 10 3.5 ab cd
           nested_2:
-            testdata.yaml:
+            test_group.yaml:
               input_validator_args:
-                integers: "" # hides the input_validator_args in secret/testdata.yaml
+                integers: [] # hides the input_validator_args in secret/test_group.yaml
             data:
               - tiny_floats:
                   in: 10.0 3.5 ab dc
               - large_integers:
                   in: 102.0 2.0 ab cd
-                  desc: Must validate, because `secret/testdata.yaml` hidden by `secret/general/nested_2/testdata.yaml`
+                  yaml:
+                    description: Must validate, because `secret/general/nested_2/test_group.yaml` shadows `secret/test_group.yaml`
       tolerant:
-        testdata.yaml:
-          output_validator_args: float_tolerance 1e-2
+        test_group.yaml:
+          output_validator_args: [float_tolerance, "1e-2"]
         data:
           - tiny_floats:
               in: 10.0 3.0 ab dc
               ans: 3.33 abcd
-              desc: |
-                Must be AC. Compare invalid_output/imprecise, which is (there)
-                invalid because of float_tolerance
+              yaml:
+                description: |
+                  Must be AC. Compare invalid_output/imprecise, which is (there)
+                  invalid because of float_tolerance
 
   invalid_input:
     data:
       always_invalid:
         data:
-          too_many_tokens: { in: 10.0 2.5 ab cd ef }
+          too_many_tokens: {in: 10.0 2.5 ab cd ef}
       integers:
-        testdata.yaml:
-          input_validator_args: --integer
+        test_group.yaml:
+          input_validator_args: [--integer]
         data:
-          ints_expected: { in: 10.0 2.5 ab cd }
+          ints_expected: {in: 10.0 2.5 ab cd}
         include:
           - small_floats
       sorted:
-        testdata.yaml:
-          input_validator_args: --sorted
+        test_group.yaml:
+          input_validator_args: [--sorted]
         include:
           - unsorted # invalid here because of --sorted flag (valid input in invalid_answers/no_output_validator_args)
   invalid_answer:
@@ -84,8 +85,8 @@ data:
             in: 10.0 2.0 cba cd
             ans: 5.0 Abccd
       with_output_validator_args:
-        testdata.yaml:
-          output_validator_args: --forbid_abcd
+        test_group.yaml:
+          output_validator_args: [--forbid_abcd]
         include:
           - imprecise # must reject because its ans includes abcd
   invalid_output:
@@ -95,6 +96,8 @@ data:
         ans: 3.333333333 abcd
         out: 3.33 abcd
   valid_output:
+    test_group.yaml:
+      output_validator_args: [float_tolerance, "1e-2"]
     data:
       valid:
         in: 10.0 3.0 ab cd
diff --git a/test/problems/divsort/problem.yaml b/test/problems/divsort/problem.yaml
index bb6bfedaf..6a0c0092e 100644
--- a/test/problems/divsort/problem.yaml
+++ b/test/problems/divsort/problem.yaml
@@ -1,3 +1,4 @@
+problem_format_version: 2023-07-draft
 name: Division and sorting
 uuid: 8ee7605a-a0ba-8ce8-2a91-a6192b70141f
 license: unknown
diff --git a/test/problems/divsort/problem_statement/problem.en.tex b/test/problems/divsort/statement/problem.en.tex
similarity index 100%
rename from test/problems/divsort/problem_statement/problem.en.tex
rename to test/problems/divsort/statement/problem.en.tex
diff --git a/test/problems/fltcmp/data/test_group.yaml b/test/problems/fltcmp/data/test_group.yaml
new file mode 100644
index 000000000..ded323389
--- /dev/null
+++ b/test/problems/fltcmp/data/test_group.yaml
@@ -0,0 +1 @@
+output_validator_args: [float_tolerance, "1E-6"]
diff --git a/test/problems/fltcmp/domjudge-problem.ini b/test/problems/fltcmp/domjudge-problem.ini
deleted file mode 100644
index e90ee3538..000000000
--- a/test/problems/fltcmp/domjudge-problem.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-probid='A'
-allow_submit='1'
-allow_judge='1'
-timelimit='1'
-color='#FFFFFF'
diff --git a/test/problems/fltcmp/problem.yaml b/test/problems/fltcmp/problem.yaml
index b474cc94e..416dc982a 100644
--- a/test/problems/fltcmp/problem.yaml
+++ b/test/problems/fltcmp/problem.yaml
@@ -1,26 +1,11 @@
+problem_format_version: 2023-07-draft
+type: pass-fail
 name: fltcmp
-author: DOMjudge
-# BAPC 2020
-source:
-# 2020.bapc.eu
-source_url:
+credits:
+  authors: DOMjudge
 uuid: 407efad0-da0d-49a4-b925-329e929bc990
 license: unknown
 rights_owner:
-# 'default', 'custom', or 'interactive'
-validation: default
 
-# One or more of:
-# case_sensitive
-# space_change_sensitive
-# float_absolute_tolerance eps
-# float_relative_tolerance eps
-# float_tolerance eps
-validator_flags: float_tolerance 1E-6
-
-# To change the time limit factors for Kattis, use:
-# limits:
-# Time limit is 2*slowest accepted submission:
-#     time_multiplier: 2
-# Warning for submissions within 1 second of limit:
-#     time_safety_margin: 1
+limits:
+  time_limit: 1.0
diff --git a/test/problems/fltcmp/problem_statement/problem.en.tex b/test/problems/fltcmp/statement/problem.en.tex
similarity index 100%
rename from test/problems/fltcmp/problem_statement/problem.en.tex
rename to test/problems/fltcmp/statement/problem.en.tex
diff --git a/test/problems/generatorincludes/.timelimit b/test/problems/generatorincludes/.timelimit
deleted file mode 100644
index d00491fd7..000000000
--- a/test/problems/generatorincludes/.timelimit
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/test/problems/generatorincludes/generators/generators.yaml b/test/problems/generatorincludes/generators/generators.yaml
index c82e8b97e..3a1a83451 100644
--- a/test/problems/generatorincludes/generators/generators.yaml
+++ b/test/problems/generatorincludes/generators/generators.yaml
@@ -10,11 +10,11 @@ data:
   secret:
     data:
       - small:
-          testdata.yaml:
-            output_validator_args: space_change_sensitive
+          test_group.yaml:
+            output_validator_args: [space_change_sensitive]
             input_validator_args:
-              connected: --small
-              strongly-connected: --small
+              connected: [--small]
+              strongly-connected: [--small]
           data:
             - positive:
                 data:
diff --git a/test/problems/generatorincludes/problem.yaml b/test/problems/generatorincludes/problem.yaml
index ffbda47d4..a07282843 100644
--- a/test/problems/generatorincludes/problem.yaml
+++ b/test/problems/generatorincludes/problem.yaml
@@ -1,26 +1,14 @@
+problem_format_version: 2023-07-draft
+type: pass-fail
 name: generatorincludes
-author: Thore Husfeldt
-# Contest name and year
-source: Problems
-# contest.region.eu
-source_url:
+credits:
+  authors: Thore Husfeldt
+source:
+  name: Problems
+  url:
 uuid: 745cc994-4c3d-40cf-97c2-cc2a72af1884
 license: cc by-sa
 rights_owner: author
-# 'default', 'custom', or 'custom interactive'
-validation: default
 
-# One or more of:
-# case_sensitive
-# space_change_sensitive
-# float_absolute_tolerance eps
-# float_relative_tolerance eps
-# float_tolerance eps
-#validator_flags:
-
-# To change the time limit factors for problemtools/Kattis, use:
-# limits:
-# Time limit is 2*slowest accepted submission: (default: 5)
-#     time_multiplier: 2
-# Warning for submissions within 50% of time limit
-#     time_safety_margin: 1.5
+limits:
+  time_limit: 1.0
diff --git a/test/problems/generatorincludes/problem_statement/problem.en.tex b/test/problems/generatorincludes/statement/problem.en.tex
similarity index 100%
rename from test/problems/generatorincludes/problem_statement/problem.en.tex
rename to test/problems/generatorincludes/statement/problem.en.tex
diff --git a/test/problems/guess/input_format_validators/validate.py b/test/problems/guess/input_validators/validate.py
similarity index 100%
rename from test/problems/guess/input_format_validators/validate.py
rename to test/problems/guess/input_validators/validate.py
diff --git a/test/problems/guess/output_validators/guess_validator/validate.cc b/test/problems/guess/output_validator/validate.cc
similarity index 100%
rename from test/problems/guess/output_validators/guess_validator/validate.cc
rename to test/problems/guess/output_validator/validate.cc
diff --git a/test/problems/guess/output_validators/guess_validator/validate.h b/test/problems/guess/output_validator/validate.h
similarity index 100%
rename from test/problems/guess/output_validators/guess_validator/validate.h
rename to test/problems/guess/output_validator/validate.h
diff --git a/test/problems/guess/output_visualizer_disabled/guess-visualizer.py b/test/problems/guess/output_visualizer_disabled/guess-visualizer.py
new file mode 100644
index 000000000..4f201edf2
--- /dev/null
+++ b/test/problems/guess/output_visualizer_disabled/guess-visualizer.py
@@ -0,0 +1,32 @@
+import sys
+from pathlib import Path
+
+
+with open(sys.argv[1]) as in_file, open(sys.argv[3] / Path("judgemessage.txt"), "r") as msg_file:
+    mode = in_file.read().split()[0]
+    assert mode in ("random", "fixed", "adaptive"), mode
+    judgemessages = iter(msg_file)
+
+    print(r"""\documentclass[varwidth]{standalone}
+\usepackage{tikz}
+\usetikzlibrary{patterns}
+\tikzset{every node/.style={font=\sffamily}}
+\begin{document}
+\begin{tikzpicture}
+    """)
+    if not mode == "adaptive":
+        secret = int(next(judgemessages).split()[-1])
+        print(rf"\node at ({secret / 100},-1.5) {{ {secret} ({mode}) }};")
+    else:
+        next(judgemessages)
+        print(r"\node at (5,-.5) { adaptive };")
+    for line in judgemessages:
+        rnd, guess = int(line.split()[1]), int(line.split()[3])
+        y = -1 - rnd
+        print(rf"\draw [very thick, blue!20] (0, {y}) -- (10, {y});")
+        print(rf"\node at ({guess / 100}, {y})[anchor=north]", r"{$\uparrow$};")
+        print(rf"\node at ({guess / 100}, {y - 0.5})[anchor=north] {{ {guess} }};")
+    if not mode == "adaptive":
+        print(rf"\draw [red] ({secret / 100}, {-rnd - 1}) -- ({secret / 100}, 0);")
+
+    print(r"\end{tikzpicture}\end{document}")
diff --git a/test/problems/guess/output_visualizer_disabled/run b/test/problems/guess/output_visualizer_disabled/run
new file mode 100755
index 000000000..62229c730
--- /dev/null
+++ b/test/problems/guess/output_visualizer_disabled/run
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Set script directory
+SCRIPT_DIR="$(dirname "$0")"
+
+# Check if visualize.py exists
+if [[ ! -f "$SCRIPT_DIR/guess-visualizer.py" ]]; then
+    echo "Error: guess-visualizer.py not found in $SCRIPT_DIR" >&2
+    exit 1
+fi
+
+tmptexdir=$(mktemp -d)  # Create a unique temporary directory
+OUTPUT_FILE="$tmptexdir/judgeimage.tex"
+
+# Run visualize.py
+python3 "$SCRIPT_DIR/guess-visualizer.py" $1 $2 $3 >  "$OUTPUT_FILE"
+if [[ $? -ne 0 ]]; then
+    echo "Error: guess-visualizer.py failed" >&2
+    exit 1
+fi
+
+# Check if judgeimage.tex exists
+if [[ ! -f "$OUTPUT_FILE" ]]; then
+    echo "Error: texfile not found in $SCRIPT_DIR" >&2
+    exit 1
+fi
+
+# Run pdflatex
+(
+    cd "$tmptexdir" && pdflatex judgeimage.tex
+)
+if [[ $? -ne 0 ]]; then
+    echo "Error: pdflatex failed" >&2
+    exit 1
+fi
+
+mv "$tmptexdir/judgeimage.pdf" $3
+rm -r "$tmptexdir"
+
+echo "Script completed successfully."
+exit 0
diff --git a/test/problems/guess/problem.yaml b/test/problems/guess/problem.yaml
index fad6f7ef0..a176263f2 100644
--- a/test/problems/guess/problem.yaml
+++ b/test/problems/guess/problem.yaml
@@ -1,13 +1,14 @@
+problem_format_version: 2023-07-draft
+type: interactive
 name:
   en: Guess the Number
 source: Kattis
 uuid: 4c1ca09b-af36-4cb6-82ec-2cd029c02a6a
 license: cc by-sa
 
-validation: custom interactive
-
 # Override standard limits: say that the TLE solutions provided should
 # be at least 4 times above the time limit in order for us to be
 # happy.
 limits:
-  time_safety_margin: 4
+  time_multipliers:
+    time_limit_to_tle: 4
diff --git a/test/problems/guess/problem_statement/problem.en.tex b/test/problems/guess/statement/problem.en.tex
similarity index 100%
rename from test/problems/guess/problem_statement/problem.en.tex
rename to test/problems/guess/statement/problem.en.tex
diff --git a/test/problems/guessnoeofcheck/input_format_validators/validate.py b/test/problems/guessnoeofcheck/input_validators/validate.py
similarity index 100%
rename from test/problems/guessnoeofcheck/input_format_validators/validate.py
rename to test/problems/guessnoeofcheck/input_validators/validate.py
diff --git a/test/problems/guessnoeofcheck/output_validators/guess_validator/validate.cc b/test/problems/guessnoeofcheck/output_validator/validate.cc
similarity index 100%
rename from test/problems/guessnoeofcheck/output_validators/guess_validator/validate.cc
rename to test/problems/guessnoeofcheck/output_validator/validate.cc
diff --git a/test/problems/guessnoeofcheck/output_validators/guess_validator/validate.h b/test/problems/guessnoeofcheck/output_validator/validate.h
similarity index 100%
rename from test/problems/guessnoeofcheck/output_validators/guess_validator/validate.h
rename to test/problems/guessnoeofcheck/output_validator/validate.h
diff --git a/test/problems/guessnoeofcheck/problem.yaml b/test/problems/guessnoeofcheck/problem.yaml
index cf3967e9e..4c20d8142 100644
--- a/test/problems/guessnoeofcheck/problem.yaml
+++ b/test/problems/guessnoeofcheck/problem.yaml
@@ -1,10 +1,12 @@
+type: interactive
+problem_format_version: 2023-07-draft
 source: Kattis
 uuid: 1bf54011-8f0f-44fb-8030-15d9c1583979
 license: cc by-sa
-validation: custom interactive
 
 # Override standard limits: say that the TLE solutions provided should
 # be at least 4 times above the time limit in order for us to be
 # happy.
 limits:
-  time_safety_margin: 4
+  time_multipliers:
+    time_limit_to_tle: 4
diff --git a/test/problems/guessnoeofcheck/problem_statement/problem.en.tex b/test/problems/guessnoeofcheck/statement/problem.en.tex
similarity index 100%
rename from test/problems/guessnoeofcheck/problem_statement/problem.en.tex
rename to test/problems/guessnoeofcheck/statement/problem.en.tex
diff --git a/test/problems/hello/domjudge-problem.ini b/test/problems/hello/domjudge-problem.ini
deleted file mode 100644
index d7ce01539..000000000
--- a/test/problems/hello/domjudge-problem.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-probid='A'
-allow_submit='1'
-allow_judge='1'
-timelimit='3'
-color='#FFFFFF'
diff --git a/test/problems/hello/problem.yaml b/test/problems/hello/problem.yaml
index 50bad5079..a0c9b836c 100644
--- a/test/problems/hello/problem.yaml
+++ b/test/problems/hello/problem.yaml
@@ -1,26 +1,11 @@
+problem_format_version: 2023-07-draft
+type: pass-fail
 name: hello
-author: DOMjudge
-# BAPC 2020
-source:
-# 2020.bapc.eu
-source_url:
+credits:
+  authors: DOMjudge
 uuid: 323a5d9c-b38a-4110-8483-2846c920c1ee
 license: unknown
 rights_owner:
-# 'default', 'custom', or 'interactive'
-validation: default
 
-# One or more of:
-# case_sensitive
-# space_change_sensitive
-# float_absolute_tolerance eps
-# float_relative_tolerance eps
-# float_tolerance eps
-#validator_flags:
-
-# To change the time limit factors for Kattis, use:
-# limits:
-# Time limit is 2*slowest accepted submission:
-#     time_multiplier: 2
-# Warning for submissions within 1 second of limit:
-#     time_safety_margin: 1
+limits:
+  time_limit: 3.0
diff --git a/test/problems/hello/problem_statement/problem.en.tex b/test/problems/hello/statement/problem.en.tex
similarity index 100%
rename from test/problems/hello/problem_statement/problem.en.tex
rename to test/problems/hello/statement/problem.en.tex
diff --git a/test/problems/helloproblemtools/domjudge-problem.ini b/test/problems/helloproblemtools/domjudge-problem.ini
deleted file mode 100644
index 7f11bbc4f..000000000
--- a/test/problems/helloproblemtools/domjudge-problem.ini
+++ /dev/null
@@ -1 +0,0 @@
-timelimit='2'
diff --git a/test/problems/helloproblemtools/problem.yaml b/test/problems/helloproblemtools/problem.yaml
index f3792eab2..89ccbed44 100644
--- a/test/problems/helloproblemtools/problem.yaml
+++ b/test/problems/helloproblemtools/problem.yaml
@@ -1,3 +1,4 @@
+problem_format_version: 2023-07-draft
 name:
   en: Hello World!
   sv: Hej Världen!
@@ -11,3 +12,4 @@ license: public domain
 # a test submission that goes over this limit.)
 limits:
   memory: 512
+  time_limit: 2.0
diff --git a/test/problems/helloproblemtools/problem_statement/problem.en.tex b/test/problems/helloproblemtools/statement/problem.en.tex
similarity index 100%
rename from test/problems/helloproblemtools/problem_statement/problem.en.tex
rename to test/problems/helloproblemtools/statement/problem.en.tex
diff --git a/test/problems/helloproblemtools/problem_statement/problem.sv.tex b/test/problems/helloproblemtools/statement/problem.sv.tex
similarity index 100%
rename from test/problems/helloproblemtools/problem_statement/problem.sv.tex
rename to test/problems/helloproblemtools/statement/problem.sv.tex
diff --git a/test/problems/hellounix/.timelimit b/test/problems/hellounix/.timelimit
deleted file mode 100644
index 00750edc0..000000000
--- a/test/problems/hellounix/.timelimit
+++ /dev/null
@@ -1 +0,0 @@
-3
diff --git a/test/problems/hellounix/problem.yaml b/test/problems/hellounix/problem.yaml
index eb9a5dfec..31d0bc41c 100644
--- a/test/problems/hellounix/problem.yaml
+++ b/test/problems/hellounix/problem.yaml
@@ -1,27 +1,14 @@
+problem_format_version: 2023-07-draft
+type: pass-fail
 name: hellounix
-author: various
+credits:
+  authors: various
 # Various tests whose behaviour is only consistent on
 # unix-y operating systems
-source:
-source_url:
 uuid: 20798d22-3227-4e48-9877-7f73d3d3236e
 license: unknown
 rights_owner:
-validation: default
 
-# One or more of:
-# case_sensitive
-# space_change_sensitive
-# float_absolute_tolerance eps
-# float_relative_tolerance eps
-# float_tolerance eps
-#validator_flags:
-
-# To change the time limit factors for Kattis, use:
-# limits:
-# Time limit is 2*slowest accepted submission:
-#     time_multiplier: 2
-# Warning for submissions within 1 second of limit:
-#     time_safety_margin: 1
 limits:
   memory: 512
+  time_limit: 10.0
diff --git a/test/problems/hellounix/problem_statement/problem.en.tex b/test/problems/hellounix/statement/problem.en.tex
similarity index 100%
rename from test/problems/hellounix/problem_statement/problem.en.tex
rename to test/problems/hellounix/statement/problem.en.tex
diff --git a/test/problems/hellowholeworld/problem.yaml b/test/problems/hellowholeworld/problem.yaml
index ea8b4d13c..c847c1cce 100644
--- a/test/problems/hellowholeworld/problem.yaml
+++ b/test/problems/hellowholeworld/problem.yaml
@@ -1,7 +1,9 @@
+problem_format_version: 2023-07-draft
+type: pass-fail
 name:
   en: Hello, Whole World!
   de: Hallo, ganze Welt!
   da: Hej, hele verden!
-author: Thore Husfeldt
+credits:
+  authors: Thore Husfeldt
 uuid: c7c28c31-809a-400c-84ae-f6a3b29a217a
-validation: default
diff --git a/test/problems/hellowholeworld/problem_statement/solution.en.tex b/test/problems/hellowholeworld/solution/solution.en.tex
similarity index 100%
rename from test/problems/hellowholeworld/problem_statement/solution.en.tex
rename to test/problems/hellowholeworld/solution/solution.en.tex
diff --git a/test/problems/hellowholeworld/problem_statement/problem.da.tex b/test/problems/hellowholeworld/statement/problem.da.tex
similarity index 100%
rename from test/problems/hellowholeworld/problem_statement/problem.da.tex
rename to test/problems/hellowholeworld/statement/problem.da.tex
diff --git a/test/problems/hellowholeworld/problem_statement/problem.en.tex b/test/problems/hellowholeworld/statement/problem.en.tex
similarity index 100%
rename from test/problems/hellowholeworld/problem_statement/problem.en.tex
rename to test/problems/hellowholeworld/statement/problem.en.tex
diff --git a/test/problems/hellowholeworld/problem_statement/problem.sv.tex b/test/problems/hellowholeworld/statement/problem.sv.tex
similarity index 100%
rename from test/problems/hellowholeworld/problem_statement/problem.sv.tex
rename to test/problems/hellowholeworld/statement/problem.sv.tex
diff --git a/test/problems/identity/.timelimit b/test/problems/identity/.timelimit
deleted file mode 100644
index d00491fd7..000000000
--- a/test/problems/identity/.timelimit
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/test/problems/identity/data/sample/5.ans b/test/problems/identity/data/sample/5.ans
new file mode 100644
index 000000000..7ed6ff82d
--- /dev/null
+++ b/test/problems/identity/data/sample/5.ans
@@ -0,0 +1 @@
+5
diff --git a/test/problems/identity/data/sample/5.in b/test/problems/identity/data/sample/5.in
new file mode 100644
index 000000000..7ed6ff82d
--- /dev/null
+++ b/test/problems/identity/data/sample/5.in
@@ -0,0 +1 @@
+5
diff --git a/test/problems/identity/data/sample/5.out b/test/problems/identity/data/sample/5.out
new file mode 100644
index 000000000..7ed6ff82d
--- /dev/null
+++ b/test/problems/identity/data/sample/5.out
@@ -0,0 +1 @@
+5
diff --git a/test/problems/identity/data/sample/6.ans.statement b/test/problems/identity/data/sample/6.ans.statement
new file mode 100644
index 000000000..1e8b31496
--- /dev/null
+++ b/test/problems/identity/data/sample/6.ans.statement
@@ -0,0 +1 @@
+6
diff --git a/test/problems/identity/data/sample/6.in.statement b/test/problems/identity/data/sample/6.in.statement
new file mode 100644
index 000000000..1e8b31496
--- /dev/null
+++ b/test/problems/identity/data/sample/6.in.statement
@@ -0,0 +1 @@
+6
diff --git a/test/problems/identity/data/sample/test_group.yaml b/test/problems/identity/data/sample/test_group.yaml
new file mode 100644
index 000000000..cb6f96a79
--- /dev/null
+++ b/test/problems/identity/data/sample/test_group.yaml
@@ -0,0 +1 @@
+output_visualizer_args: [--draw-please]
diff --git a/test/problems/identity/generators/generators.yaml b/test/problems/identity/generators/generators.yaml
index 5b454ee07..92682dcf1 100644
--- a/test/problems/identity/generators/generators.yaml
+++ b/test/problems/identity/generators/generators.yaml
@@ -1,6 +1,4 @@
 solution: /submissions/accepted/author.py
-# The visualizer is disabled to speed up testing.
-#visualizer: /visualizers
 random_salt: "abc"
 
 generators:
@@ -67,6 +65,15 @@ data:
         copy: manual/sample
       "4":
         copy: manual/inans
+      "5":
+        in: "5"
+        ans: "5"
+        out: "5"
+      "6":
+        in.statement: "6"
+        ans.statement: "6"
+    test_group.yaml:
+      output_visualizer_args: [--draw-please]
 
   secret:
     data:
@@ -75,7 +82,7 @@ data:
           stdoutpy: stdout.py 200
           stdoutcpp: stdout.cpp 201
           inans: write_in_and_ans.py 202
-          hintdesc: hint_desc.py 203
+          hint_desc_yaml: hint_desc_yaml.py 203
           main_py: main_py 204
           main_c: main_c 205
           main_cpp: main_cpp 206
@@ -126,7 +133,6 @@ data:
         solution: /generators/solution.c
         generate: random_gen.py {seed:7}
       testcase_dict_3:
-        visualizer:
         generate: random_gen.py {seed:8}
       unused_args_1: > # Spread arguments over multiple lines.
         random_gen.py
@@ -199,10 +205,10 @@ data:
       count_group:
         data:
           generate:
-              generate: stdout.py 704
-              count: 3
+            generate: stdout.py 704
+            count: 3
           seed:
-              generate: random_gen.py {seed:10}
-              count: 3
+            generate: random_gen.py {seed:10}
+            count: 3
 
 unknown_key:
diff --git a/test/problems/identity/generators/hint_desc.py b/test/problems/identity/generators/hint_desc_yaml.py
similarity index 58%
rename from test/problems/identity/generators/hint_desc.py
rename to test/problems/identity/generators/hint_desc_yaml.py
index 8c503fe59..c842f91b2 100644
--- a/test/problems/identity/generators/hint_desc.py
+++ b/test/problems/identity/generators/hint_desc_yaml.py
@@ -5,5 +5,4 @@
 n = sys.argv[1]
 Path("testcase.in").write_text(n + "\n")
 Path("testcase.ans").write_text(n + "\n")
-Path("testcase.hint").write_text("hint: " + n + "\n")
-Path("testcase.desc").write_text("description: " + n + "\n")
+Path("testcase.yaml").write_text("hint: " + n + "\ndescription: " + n + "\n")
diff --git a/test/problems/identity/input_visualizer_disabled/run b/test/problems/identity/input_visualizer_disabled/run
new file mode 100755
index 000000000..ecc84b0c4
--- /dev/null
+++ b/test/problems/identity/input_visualizer_disabled/run
@@ -0,0 +1,5 @@
+#!/usr/bin/env sh
+
+set -e
+
+cat $1 $2 | asy -f png $(dirname $0)/visualize.asy -o testcase.png
diff --git a/test/problems/identity/visualizers/visualize.asy b/test/problems/identity/input_visualizer_disabled/visualize.asy
similarity index 100%
rename from test/problems/identity/visualizers/visualize.asy
rename to test/problems/identity/input_visualizer_disabled/visualize.asy
diff --git a/test/problems/identity/output_visualizer_disabled/run b/test/problems/identity/output_visualizer_disabled/run
new file mode 100755
index 000000000..d8d0ead16
--- /dev/null
+++ b/test/problems/identity/output_visualizer_disabled/run
@@ -0,0 +1,13 @@
+#!/usr/bin/env sh
+
+set -e
+
+draw=false
+for var in "$@"
+do
+    [ "$var" = "--draw-please" ] && draw=true
+done
+
+if [ "$draw" = true ]; then
+	asy -f png $(dirname $0)/visualize.asy -u infilename="'${1}'" -u ansfilename="'${2}'" -o $3/judgeimage.png
+fi
diff --git a/test/problems/identity/output_visualizer_disabled/visualize.asy b/test/problems/identity/output_visualizer_disabled/visualize.asy
new file mode 100644
index 000000000..dc5bda3b0
--- /dev/null
+++ b/test/problems/identity/output_visualizer_disabled/visualize.asy
@@ -0,0 +1,20 @@
+defaultpen(1);
+
+string outvalue = stdin;
+
+string infilename;
+string ansfilename;
+usersetting();
+file fin=input(infilename);
+file fans=input(infilename);
+string invalue = fin;
+string ansvalue = fans;
+
+string label = "\texttt{in}: " + invalue ;
+label(scale(5)*label, (0,200));
+string label = "\texttt{ans}: " + ansvalue ;
+label(scale(5)*label, (0,100));
+pen labelPen = (invalue == outvalue) ? green : red;
+string label = "\texttt{out}: " + outvalue ;
+label(scale(5)*label, (0,0), p=labelPen);
+shipout(bbox(xmargin=5, white, Fill));
diff --git a/test/problems/identity/problem.yaml b/test/problems/identity/problem.yaml
index a5170d17a..c1a013182 100644
--- a/test/problems/identity/problem.yaml
+++ b/test/problems/identity/problem.yaml
@@ -1,24 +1,13 @@
-name: Identity
-author: Ragnar Groot Koerkamp
-source:
-source_url:
+problem_format_version: 2023-07-draft
+type: pass-fail
+name:
+  en: Identity
+  de: Identität
+credits:
+  authors: Ragnar Groot Koerkamp
 uuid: a7d29d67-9b0b-4fd4-ae56-ab2cad5919ab
 license: unknown
 rights_owner:
-# 'default', 'custom', or 'interactive'
-validation: default
 
-# One or more of:
-# case_sensitive
-# space_change_sensitive
-# float_absolute_tolerance eps
-# float_relative_tolerance eps
-# float_tolerance eps
-#validator_flags:
-
-# To change the time limit factors for Kattis, use:
-# limits:
-# Time limit is 2*slowest accepted submission:
-#     time_multiplier: 2
-# Warning for submissions within 1 second of limit:
-#     time_safety_margin: 1
+limits:
+  time_limit: 1.0
diff --git a/test/problems/identity/problem_slide/problem-slide.de.tex b/test/problems/identity/problem_slide/problem-slide.de.tex
new file mode 100644
index 000000000..164a1f0d3
--- /dev/null
+++ b/test/problems/identity/problem_slide/problem-slide.de.tex
@@ -0,0 +1,10 @@
+\newcommand{\maxn}{1000}
+
+\begin{frame}
+    \frametitle{\problemtitle}
+
+    \begin{itemize}
+        \item Gegeben ein Integer $0\leq n\leq \maxn$.
+        \item Gebe eine Zeile mit $n$ aus.
+    \end{itemize}
+\end{frame}
diff --git a/test/problems/identity/problem_statement/problem-slide.en.tex b/test/problems/identity/problem_slide/problem-slide.en.tex
similarity index 100%
rename from test/problems/identity/problem_statement/problem-slide.en.tex
rename to test/problems/identity/problem_slide/problem-slide.en.tex
diff --git a/test/problems/identity/solution/solution.de.tex b/test/problems/identity/solution/solution.de.tex
new file mode 100644
index 000000000..201b90853
--- /dev/null
+++ b/test/problems/identity/solution/solution.de.tex
@@ -0,0 +1,8 @@
+% this file is intentionally missing
+\begin{frame}
+    \frametitle{\problemtitle}
+    \begin{itemize}
+        \item Gebe $4$ aus.
+        \solvestats
+    \end{itemize}
+\end{frame}
diff --git a/test/problems/identity/problem_statement/solution.en.tex b/test/problems/identity/solution/solution.en.tex
similarity index 100%
rename from test/problems/identity/problem_statement/solution.en.tex
rename to test/problems/identity/solution/solution.en.tex
diff --git a/test/problems/identity/statement/problem.de.tex b/test/problems/identity/statement/problem.de.tex
new file mode 100644
index 000000000..03d9cbcea
--- /dev/null
+++ b/test/problems/identity/statement/problem.de.tex
@@ -0,0 +1,22 @@
+\problemname{}
+
+\newcommand{\maxn}{1000}
+
+Gegeben $n$, gebe $n$ aus.
+
+\begin{Input}
+    Die Eingabe besteht aus:
+    \begin{itemize}
+        \item Einer Zeile mit einem Integer $0\leq n\leq \maxn$.
+    \end{itemize}
+\end{Input}
+
+\begin{Output}
+	Gebe eine Zeile mit $n$ aus.
+\end{Output}
+
+\nextsample{}
+Dieser Text steht hinter dem ersten Beispiel.
+
+\remainingsamples{}
+Dieser Text steht hinter allen Beispielen.
diff --git a/test/problems/identity/problem_statement/problem.en.tex b/test/problems/identity/statement/problem.en.tex
similarity index 100%
rename from test/problems/identity/problem_statement/problem.en.tex
rename to test/problems/identity/statement/problem.en.tex
diff --git a/test/problems/identity/visualizers/run b/test/problems/identity/visualizers/run
deleted file mode 100755
index b82da4c8d..000000000
--- a/test/problems/identity/visualizers/run
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env sh
-
-set -e
-
-name=$1
-cat $name.in $name.ans | asy -f png $(dirname $0)/visualize.asy -o $name.png
diff --git a/test/problems/interactivemultipass/output_validators/interactive_multipass_validator/interctive_multipass_validator.py b/test/problems/interactivemultipass/output_validator/interctive_multipass_validator.py
similarity index 100%
rename from test/problems/interactivemultipass/output_validators/interactive_multipass_validator/interctive_multipass_validator.py
rename to test/problems/interactivemultipass/output_validator/interctive_multipass_validator.py
diff --git a/test/problems/interactivemultipass/problem.yaml b/test/problems/interactivemultipass/problem.yaml
index 184568f32..c71be23ed 100644
--- a/test/problems/interactivemultipass/problem.yaml
+++ b/test/problems/interactivemultipass/problem.yaml
@@ -1,8 +1,10 @@
+problem_format_version: 2023-07-draft
+type:
+  - interactive
+  - multi-pass
 name: interactive multi-pass
-author: Michael Zündorf
-source:
-source_url:
+credits:
+  authors: Michael Zündorf
 uuid: 42c9ed2e-f579-46ac-ae2c-191069f3df70
 license: unknown
 rights_owner:
-validation: custom interactive multi-pass
diff --git a/test/problems/interactivemultipass/problem_statement/problem.en.tex b/test/problems/interactivemultipass/statement/problem.en.tex
similarity index 100%
rename from test/problems/interactivemultipass/problem_statement/problem.en.tex
rename to test/problems/interactivemultipass/statement/problem.en.tex
diff --git a/test/problems/multipass/answer_validators/validate.ctd b/test/problems/multipass/answer_validators/validate.ctd
new file mode 100644
index 000000000..1a2b1dc14
--- /dev/null
+++ b/test/problems/multipass/answer_validators/validate.ctd
@@ -0,0 +1 @@
+EOF
diff --git a/test/problems/multipass/output_validators/multipass_validator/multipass_validator.py b/test/problems/multipass/output_validator/multipass_validator.py
similarity index 100%
rename from test/problems/multipass/output_validators/multipass_validator/multipass_validator.py
rename to test/problems/multipass/output_validator/multipass_validator.py
diff --git a/test/problems/multipass/problem.yaml b/test/problems/multipass/problem.yaml
index 0cf04635e..500eb82bc 100644
--- a/test/problems/multipass/problem.yaml
+++ b/test/problems/multipass/problem.yaml
@@ -1,8 +1,8 @@
+problem_format_version: 2023-07-draft
+type: multi-pass
 name: multi-pass
-author: Michael Zündorf
-source:
-source_url:
+credits:
+  authors: Michael Zündorf
 uuid: 71076b69-e9c2-4227-ba54-ec3d4e277c78
 license: unknown
 rights_owner:
-validation: custom multi-pass
diff --git a/test/problems/multipass/problem_statement/problem.en.tex b/test/problems/multipass/statement/problem.en.tex
similarity index 100%
rename from test/problems/multipass/problem_statement/problem.en.tex
rename to test/problems/multipass/statement/problem.en.tex
diff --git a/test/problems/problems.yaml b/test/problems/problems.yaml
index 02df538df..13a3d923a 100644
--- a/test/problems/problems.yaml
+++ b/test/problems/problems.yaml
@@ -1,2 +1,7 @@
 - id: identity
   label: A
+  name:
+    en: Identity
+    de: Identität
+  rgb: '#000000'
+  time_limit: 1.0
diff --git a/test/problems/solve_stats/activity/B.pdf b/test/problems/solve_stats/activity/B.pdf
new file mode 100644
index 000000000..8389db7e9
Binary files /dev/null and b/test/problems/solve_stats/activity/B.pdf differ
diff --git a/test/problems/test_problem_config/domjudge-problem.ini b/test/problems/test_problem_config/domjudge-problem.ini
deleted file mode 100644
index 04ad55730..000000000
--- a/test/problems/test_problem_config/domjudge-problem.ini
+++ /dev/null
@@ -1 +0,0 @@
-timelimit = '3'
diff --git a/test/problems/test_problem_config/problem.yaml b/test/problems/test_problem_config/problem.yaml
deleted file mode 100644
index 537c2d800..000000000
--- a/test/problems/test_problem_config/problem.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-name: 'ABC XYZ'
-validation: 'custom'
-uuid: 58c89b2d-616c-4291-ab8a-710b4e6cb978
diff --git a/test/problems/testproblemconfig/output_validator/.gitkeep b/test/problems/testproblemconfig/output_validator/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/problems/testproblemconfig/problem.yaml b/test/problems/testproblemconfig/problem.yaml
new file mode 100644
index 000000000..e5342f032
--- /dev/null
+++ b/test/problems/testproblemconfig/problem.yaml
@@ -0,0 +1,6 @@
+problem_format_version: 2023-07-draft
+type: pass-fail
+name: 'ABC XYZ'
+uuid: 58c89b2d-616c-4291-ab8a-710b4e6cb978
+limits:
+  time_limit: 3.0
diff --git a/test/problems/test_problem_config/problem_statement/problem.en.tex b/test/problems/testproblemconfig/statement/problem.en.tex
similarity index 100%
rename from test/problems/test_problem_config/problem_statement/problem.en.tex
rename to test/problems/testproblemconfig/statement/problem.en.tex
diff --git a/test/test_default_output_validator.py b/test/test_default_output_validator.py
index 9c7286726..fae741e87 100644
--- a/test/test_default_output_validator.py
+++ b/test/test_default_output_validator.py
@@ -13,7 +13,7 @@
 
 RUN_DIR = Path.cwd().resolve()
 # Note: the python version isn't tested by default, because it's quite slow.
-DEFAULT_OUTPUT_VALIDATORS = ["default_output_validator.cpp"]
+DEFAULT_OUTPUT_VALIDATOR = ["default_output_validator.cpp"]
 
 config.args.verbose = 2
 config.args.error = True
@@ -42,7 +42,7 @@ def read_tests():
     return tests
 
 
-@pytest.fixture(scope="class", params=DEFAULT_OUTPUT_VALIDATORS)
+@pytest.fixture(scope="class", params=DEFAULT_OUTPUT_VALIDATOR)
 def validator(request):
     problem_dir = RUN_DIR / "test/problems/identity"
     os.chdir(problem_dir)
@@ -65,11 +65,11 @@ class MockRun:
 
 
 @pytest.mark.usefixtures("validator")
-class TestDefaultOutputValidators:
-    @pytest.mark.parametrize("testdata", read_tests())
-    def test_default_output_validators(self, validator, testdata):
+class TestDefaultOutputValidator:
+    @pytest.mark.parametrize("test_data", read_tests())
+    def test_default_output_validator(self, validator, test_data):
         problem, validator = validator
-        flags, ans, out, exp = testdata
+        flags, ans, out, exp = test_data
         flags = flags.split()
 
         (problem.tmpdir / "data").mkdir(exist_ok=True, parents=True)
@@ -87,11 +87,11 @@ def test_default_output_validators(self, validator, testdata):
         r.out_path = out_path
         r.feedbackdir = problem.tmpdir / "data"
 
-        # TODO: the validators should probably be able to figure the flags out from the Problem config
+        # TODO: the validator should probably be able to figure the flags out from the Problem config
         result = validator.run(t, r, args=flags)
 
         if result.status != exp:
-            print(testdata)
+            print(test_data)
             for k in vars(result):
                 print(k, " -> ", getattr(result, k))
         assert result.status == exp
diff --git a/test/test_generators_yaml.py b/test/test_generators_yaml.py
index cac69d8f1..480742a31 100644
--- a/test/test_generators_yaml.py
+++ b/test/test_generators_yaml.py
@@ -10,11 +10,19 @@
 config.set_default_args()
 
 
+class MockSettings:
+    def __init__(self):
+        self.constants = {}
+
+
 class MockProblem:
     def __init__(self):
         self.path = Path(".")
         self._program_callbacks = dict()
         self._rules_cache = dict()
+        self.settings = MockSettings()
+        self.interactive = False
+        self.multi_pass = False
 
 
 class MockGeneratorConfig(generate.GeneratorConfig):
@@ -22,25 +30,27 @@ def __init__(self, problem, restriction=None):
         self.problem = problem
         self.n_parse_error = 0
 
-        # A map of paths `secret/testgroup/testcase` to their canonical TestcaseRule.
+        # A map of paths `secret/test_group/test_case` to their canonical TestcaseRule.
         # For generated cases this is the rule itself.
-        # For included cases, this is the 'resolved' location of the testcase that is included.
+        # For included cases, this is the 'resolved' location of the test case that is included.
         self.known_cases = dict()
-        # A set of paths `secret/testgroup`.
+        # A set of paths `secret/test_group`.
         # Used for cleanup.
         self.known_directories = dict()
         # Used for cleanup
         self.known_files = set()
-        # A map from key to (is_included, list of testcases and directories),
+        # A map from key to (is_included, list of test cases and directories),
         # used for `include` statements.
         self.known_keys = collections.defaultdict(lambda: [False, []])
         # A set of testcase rules, including seeds.
         self.rules_cache = dict()
-        # The set of generated testcases keyed by testdata.
+        # The set of generated test cases keyed by hash(test_case).
         # Used to delete duplicated unlisted cases.
-        self.generated_testdata = dict()
+        self.generated_test_cases = dict()
         # Path to the trash directory for this run
-        self.trashdir = None
+        self.trash_dir = None
+        # Set of hash(.in) for all generated testcases
+        self.hashed_in = set()
         # Files that should be processed
         self.restriction = restriction
 
diff --git a/test/test_problem_yaml.py b/test/test_problem_yaml.py
index 4f6b4117c..0b90b799a 100644
--- a/test/test_problem_yaml.py
+++ b/test/test_problem_yaml.py
@@ -54,20 +54,20 @@ class MockProblem:
 
 
 class TestProblemYaml:
-    @pytest.mark.parametrize("testdata", read_tests("valid"))
-    def test_valid(self, testdata):
+    @pytest.mark.parametrize("test_data", read_tests("valid"))
+    def test_valid(self, test_data):
         config.n_error = 0
         config.n_warn = 0
 
-        p = problem.ProblemSettings(testdata["yaml"], cast(problem.Problem, MockProblem()))
+        p = problem.ProblemSettings(test_data["yaml"], cast(problem.Problem, MockProblem()))
         assert config.n_error == 0 and config.n_warn == 0, (
             f"Expected zero errors and warnings, got {config.n_error} and {config.n_warn}"
         )
-        if "eq" in testdata:
-            assert_equal(p, testdata["eq"])
+        if "eq" in test_data:
+            assert_equal(p, test_data["eq"])
 
-    @pytest.mark.parametrize("testdata", read_tests("invalid"))
-    def test_invalid(self, monkeypatch, testdata):
+    @pytest.mark.parametrize("test_data", read_tests("invalid"))
+    def test_invalid(self, monkeypatch, test_data):
         config.n_error = 0
         config.n_warn = 0
 
@@ -85,16 +85,16 @@ def test_invalid(self, monkeypatch, testdata):
         )
 
         try:
-            problem.ProblemSettings(testdata["yaml"], cast(problem.Problem, MockProblem()))
+            problem.ProblemSettings(test_data["yaml"], cast(problem.Problem, MockProblem()))
         except SystemExit as e:
             assert e.code == -42
 
-        assert ([call(testdata["fatal"])] if "fatal" in testdata else []) == fatal.mock_calls
+        assert ([call(test_data["fatal"])] if "fatal" in test_data else []) == fatal.mock_calls
 
-        if isinstance(testdata.get("error", None), str):
-            testdata["error"] = [testdata["error"]]
-        assert [call(x) for x in testdata.get("error", [])] == error.mock_calls
+        if isinstance(test_data.get("error", None), str):
+            test_data["error"] = [test_data["error"]]
+        assert [call(x) for x in test_data.get("error", [])] == error.mock_calls
 
-        if isinstance(testdata.get("warn", None), str):
-            testdata["warn"] = [testdata["warn"]]
-        assert [call(x) for x in testdata.get("warn", [])] == warn.mock_calls
+        if isinstance(test_data.get("warn", None), str):
+            test_data["warn"] = [test_data["warn"]]
+        assert [call(x) for x in test_data.get("warn", [])] == warn.mock_calls
diff --git a/test/test_problems.py b/test/test_problems.py
index a409e8ebb..9be303627 100644
--- a/test/test_problems.py
+++ b/test/test_problems.py
@@ -2,6 +2,7 @@
 import os
 import io
 from pathlib import Path
+from zipfile import ZipFile
 
 import tools
 import problem
@@ -19,11 +20,9 @@
     "divsort",
     "interactivemultipass",
     "multipass",
+    "constants",
 ] + ["hellounix" if not util.is_mac() and not util.is_windows() else []]
 
-# Run various specific commands on this problem.
-IDENTITY_PROBLEMS = ["identity"]
-
 RUN_DIR = Path.cwd().resolve()
 
 
@@ -43,6 +42,40 @@ def test_problem(self):
         tools.test(["run"])
 
 
+@pytest.fixture(scope="class")
+def setup_constants_problem(request):
+    problem_dir = RUN_DIR / "test/problems/constants"
+    os.chdir(problem_dir)
+    try:
+        tools.test(["tmp", "--clean"])
+        yield
+    finally:
+        tools.test(["tmp", "--clean"])
+        os.chdir(RUN_DIR)
+
+
+@pytest.mark.usefixtures("setup_constants_problem")
+class TestConstantsProblem:
+    def test_generate(self):
+        tools.test(["generate"])
+
+    def test_pdf(self):
+        tools.test(["pdf"])
+
+    def test_solutions(self):
+        tools.test(["solutions"])
+
+    def test_problem_slides(self):
+        tools.test(["problem_slides"])
+
+    def test_validate(self):
+        tools.test(["validate"])
+
+    def test_zip(self):
+        tools.test(["zip", "--force"])
+        Path("constants.zip").unlink()
+
+
 @pytest.fixture(scope="class")
 def setup_identity_problem(request):
     problem_dir = RUN_DIR / "test/problems/identity"
@@ -111,11 +144,61 @@ def test_constraints(self):
     # Exporting
     def test_samplezip(self):
         tools.test(["samplezip"])
-        Path("samples.zip").unlink()
+        zip_path = Path("samples.zip")
+
+        # Sample zip should contain exactly one .in and .ans file.
+        assert sorted(
+            (info.filename, info.file_size)
+            for info in ZipFile(zip_path).infolist()
+            if info.filename.startswith("A/")
+        ) == [
+            (f"A/{i}.{ext}", size)
+            for i, size in enumerate([2, 4, 2, 5, 2, 2], start=1)
+            for ext in ["ans", "in"]
+        ], "Sample zip contents are not correct"
+
+        zip_path.unlink()
 
     def test_zip(self):
+        zip_path = Path("identity.zip")
+
         tools.test(["zip", "--force"])
-        Path("identity.zip").unlink()
+
+        # The full zip should contain the samples with the original file extensions.
+        assert sorted(
+            (info.filename, info.file_size)
+            for info in ZipFile(zip_path).infolist()
+            if info.filename.startswith("identity/data/sample/")
+        ) == [
+            *(
+                (f"identity/data/sample/{i}.{ext}", size)
+                for i, size in enumerate([2, 4, 2, 5], start=1)
+                for ext in ["ans", "in"]
+            ),
+            *((f"identity/data/sample/5.{ext}", 2) for ext in ["ans", "in", "out"]),
+            *((f"identity/data/sample/6.{ext}.statement", 2) for ext in ["ans", "in"]),
+        ], "Zip contents for data/sample/ are not correct"
+
+        # The full zip should contain all PDFs in their corresponding directories.
+        assert sorted(
+            info.filename for info in ZipFile(zip_path).infolist() if info.filename.endswith(".pdf")
+        ) == [
+            f"identity/{path}.{lang}.pdf"
+            for path in ["problem_slide/problem-slide", "solution/solution", "statement/problem"]
+            for lang in ["de", "en"]
+        ], "Zip contents for PDFs with both languages are not correct"
+
+        tools.test(["zip", "--force", "--lang", "en"])
+
+        # The full zip should contain all PDFs in their corresponding directories.
+        assert sorted(
+            info.filename for info in ZipFile(zip_path).infolist() if info.filename.endswith(".pdf")
+        ) == [
+            f"identity/{path}.en.pdf"
+            for path in ["problem_slide/problem-slide", "solution/solution", "statement/problem"]
+        ], "Zip contents for PDFs with `--lang en` are not correct"
+
+        zip_path.unlink()
 
     # Misc
     # def test_all(self): tools.test(['all'])
@@ -171,6 +254,34 @@ def test_problem_slides(self):
     def test_gitlabci(self):
         tools.test(["gitlabci"])
 
+    def test_zip(self):
+        zip_path = Path("problems.zip")
+
+        for languages in [["en", "de"], ["en"]]:
+            tools.test(["zip", "--force", "--lang", *languages])
+
+            # The full zip should contain all PDFs in their corresponding directories.
+            assert sorted(info.filename for info in ZipFile(zip_path).infolist()) == sorted(
+                [
+                    "contest.yaml",
+                    "identity.zip",
+                    "problems.yaml",
+                    "samples.zip",
+                    *(
+                        f"{name}{suffix}.{lang}.pdf"
+                        for name in ["contest", "solutions", "problem-slides"]
+                        for lang in languages
+                        for suffix in ["", "-web"]
+                        # The problem slides do not have a -web version.
+                        if (name, suffix) != ("problem-slides", "-web")
+                    ),
+                ]
+            ), f"Zip contents for contest zip are not correct for languages {languages}"
+
+        zip_path.unlink()
+        Path("identity/identity.zip").unlink()
+        Path("samples.zip").unlink()
+
 
 @pytest.fixture(scope="function")
 def tmp_contest_dir(tmp_path):
@@ -215,7 +326,7 @@ def test_new_contest_problem(self, monkeypatch):
 
 class TestReadProblemConfig:
     def test_read_problem_config(self):
-        p = problem.Problem(RUN_DIR / "test/problems/test_problem_config", Path("/tmp/xyz"))
+        p = problem.Problem(RUN_DIR / "test/problems/testproblemconfig", Path("/tmp/xyz"))
         assert p.settings.name["en"] == "ABC XYZ"
         assert p.custom_output and not p.interactive and not p.multi_pass
         assert p.limits.time_limit == 3.0
diff --git a/test/test_verdicts.py b/test/test_verdicts.py
index dafa57cb2..f81aff3c0 100644
--- a/test/test_verdicts.py
+++ b/test/test_verdicts.py
@@ -77,5 +77,5 @@ def test_slowest_testcase(self):
         verds.set("secret/a/1", "TLE", 2.9)
         verds.set("secret/a/2", "RTE", 3.5)
         verds.set("secret/a/3", "TLE", 3.2)
-        assert verds.salient_testcase() == ("secret/a/1", 2.9)
-        assert verds.slowest_testcase() == ("secret/a/2", 3.5)
+        assert verds.salient_test_case() == ("secret/a/1", 2.9)
+        assert verds.slowest_test_case() == ("secret/a/2", 3.5)
diff --git a/test/yaml/generators/invalid_yaml/bad_generators.yaml b/test/yaml/generators/invalid_yaml/bad_generators.yaml
index 7c1f24525..2ce4851af 100644
--- a/test/yaml/generators/invalid_yaml/bad_generators.yaml
+++ b/test/yaml/generators/invalid_yaml/bad_generators.yaml
@@ -23,9 +23,6 @@ solution: true
 ---
 solution: false
 ---
-# visualizer must be null or string
-visualizer: 0
----
 # random_salt must be null or string
 random_salt: 0
 ---
@@ -164,21 +161,13 @@ data:
 data:
   ab: /generators/dir/gen.py
 ---
-# Solution ans visualizer must have an absolute path:
+# Solution must have an absolute path:
 solution: a
 ---
 solution: a/b
 ---
 solution: a 1 2
 ---
-visualizer: a
----
-visualizer: a/b
----
-visualizer: a 1 2
----
-visualizer: a {name}
----
 # Directories may not have generate:.
 generate: xyz
 ---
diff --git a/test/yaml/generators/invalid_yaml/invalid.generators.yaml b/test/yaml/generators/invalid_yaml/invalid.generators.yaml
index b78f8e92a..6fc27a90b 100644
--- a/test/yaml/generators/invalid_yaml/invalid.generators.yaml
+++ b/test/yaml/generators/invalid_yaml/invalid.generators.yaml
@@ -7,7 +7,7 @@ data: {sample: {data: []}}
 # missing sample:
 data: {secret: {data: []}}
 ---
-# invalid testgroup below root
+# invalid test group below root
 data: {sample: {data: []}, secret: {data: []}, public: {data: []}}
 ---
 # solution must be null or string
@@ -29,10 +29,6 @@ data: {sample: {data: []}, secret: {data: []}}
 solution: false
 data: {sample: {data: []}, secret: {data: []}}
 ---
-# visualizer must be null or string
-visualizer: 0
-data: {sample: {data: []}, secret: {data: []}}
----
 # random_salt must be null or string
 random_salt: 0
 data: {sample: {data: []}, secret: {data: []}}
@@ -266,7 +262,7 @@ data:
       a:
         generate: /generators/gen.py
 ---
-# Solution and visualizer must have an absolute path:
+# Solution must have an absolute path:
 solution: a
 data: {sample: {data: []}, secret: {data: []}}
 ---
@@ -276,23 +272,11 @@ data: {sample: {data: []}, secret: {data: []}}
 solution: a 1 2
 data: {sample: {data: []}, secret: {data: []}}
 ---
-visualizer: a
-data: {sample: {data: []}, secret: {data: []}}
----
-visualizer: a/b
-data: {sample: {data: []}, secret: {data: []}}
----
-visualizer: a 1 2
-data: {sample: {data: []}, secret: {data: []}}
----
-visualizer: a {name}
-data: {sample: {data: []}, secret: {data: []}}
----
 ## No toplevel generate TODO
 #generate: xyz
 #data: {sample: {data: []}, secret: {data: []}}
 #---
-# No testgroup generate
+# No generate in test group
 data:
   sample: {data: []}
   secret:
@@ -397,7 +381,7 @@ data:
        generate: my_generator {count}
        count: 101
 ---
-# No testdata.yaml on testcase level
+# No test_group.yaml on testcase level
 # TODO Not picked up by JSON schema
 data:
   sample: {}
@@ -405,6 +389,5 @@ data:
     data:
     - '':
         in: '1 2'
-        visualizer: "/ab/c" # this is fine
-        testdata.yaml:      # this is not
-          input_validator_args: "connected"
+        test_group.yaml:      # this is not ok
+          input_validator_args: [connected]
diff --git a/test/yaml/generators/test_schemata.sh b/test/yaml/generators/test_schemata.sh
index 93c3ee6ad..59a64382d 100644
--- a/test/yaml/generators/test_schemata.sh
+++ b/test/yaml/generators/test_schemata.sh
@@ -1,5 +1,7 @@
 # Validate all valid generator YAML found in the following dirs agains the CUE schema:
 
+cd "$(dirname "$0")"
+
 all_valid_yaml=(../../../doc ../../../skel/problem ../../problems valid_yaml)
 
 # Arguments
@@ -20,7 +22,10 @@ trap "rm -rf $SNIPPETS_DIR" EXIT
 for dir in "${all_valid_yaml[@]}"; do
     for file in $(find "$dir" -type f -name '*generators.yaml'); do
 	    echo -n "cue vet "$file" $schemadir/*.cue -d \"#Generators\" "
-	    output_cue=$(cue vet "$file" $schemadir/*.cue -d "#Generators" 2>&1)
+	    tmp="$(mktemp --suffix .yaml)"
+	    sed "s/{%test_group_yaml_comment%}/#/" "$file" | sed "s/{%output_validator_args%}//" > "$tmp"
+	    output_cue=$(cue vet "$tmp" $schemadir/*.cue -d "#Generators" 2>&1)
+	    rm "$tmp"
 	    exit_code_cue=$?
 	    if [ $exit_code_cue -eq 0 ]; then
 		    echo -n -e "\033[0;32mOK(cue)\033[0m"
@@ -69,6 +74,11 @@ done
 
 # Run `cue vet` on each invalid yaml file and snippet
 for snippet in "$SNIPPETS_DIR"/*.yaml; do
+	if ! grep -q '^[^#]' "$snippet"; then
+		# TODO: empty generators.yaml files _should_ be invalid, but for some reason, the CI currently disagrees.
+		echo "Skipping empty $(basename $snippet)"
+		continue
+	fi
 	echo -n "Invalidating $(basename $snippet) "
 	snippet_failed=0
 	cue vet "$snippet"  $schemadir/*.cue -d "#Generators" > /dev/null 2>&1
diff --git a/test/yaml/generators/valid_yaml/rich-generators.yaml b/test/yaml/generators/valid_yaml/rich-generators.yaml
index b2f5d743f..26d62cbbd 100644
--- a/test/yaml/generators/valid_yaml/rich-generators.yaml
+++ b/test/yaml/generators/valid_yaml/rich-generators.yaml
@@ -21,18 +21,24 @@ data:
       'explicit':
         in: "-1 2"
         ans: "1"
-        desc: "Negative numbers"
-        hint: "Remember that a can be negative"
+        yaml:
+          description: "Negative numbers"
+          hint: "Remember that a can be negative"
       'curlies': my_generator {seed:1} --name {name}
       'morecurlies':
         generate: my_generator {seed:1} --name {name} --ctr {count} --arg {count}
         count: 5
-      'group_with_testdata':
-        testdata.yaml:
-          input_validator_args: "--connected --max_n 2000"
-        visualizer: "/foo/bar/baz"
+      'group_with_test_group_yaml':
+        test_group.yaml:
+          input_validator_args: [--connected, --max_n, "2000"]
         data:
           'a': my_generator
+          # A test case may not be called 'test_group', but it may appear as part of the name.
+          'test_group_': my_generator
+      'group_with_numbers':
+        data:
+          # A numbered test case may be called 'X-test-group'.
+          - 'test_group': my_generator
   invalid_input:
     data:
     - '':
diff --git a/test/yaml/problem/invalid.yaml b/test/yaml/problem/invalid.yaml
index 630e52b05..161b3f57a 100644
--- a/test/yaml/problem/invalid.yaml
+++ b/test/yaml/problem/invalid.yaml
@@ -1,6 +1,7 @@
 ---
 # Unknown keys
 yaml:
+  problem_format_version: 2023-07-draft
   mumbo: jumbo
 warn: "found unknown problem.yaml key: mumbo in root"
 ---
@@ -11,23 +12,41 @@ yaml:
 warn: "found unknown problem.yaml key: mumbo in `credits`"
 ---
 yaml:
+  problem_format_version: 2023-07-draft
   limits:
     mumbo: jumbo
 warn: "found unknown problem.yaml key: mumbo in `limits`"
 ---
 yaml:
+  problem_format_version: 2023-07-draft
   limits:
     time_multipliers:
       mumbo: jumbo
 warn: "found unknown problem.yaml key: mumbo in `limits.time_multipliers`"
 
+---
+# UUID
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Invalid UUID, too short
+  uuid: 12345678-abcd
+warn: "invalid uuid: 12345678-abcd"
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Invalid UUID, not hexadecimal
+  uuid: 12345678-abcd-efgh-ijkl-12345678
+warn: "invalid uuid: 12345678-abcd-efgh-ijkl-12345678"
+
 ---
 # Name
 yaml:
+  problem_format_version: 2023-07-draft
   name: 42
 warn: incompatible value for key 'name' in problem.yaml. SKIPPED.
 ---
 yaml:
+  problem_format_version: 2023-07-draft
   name:
     en: 42
 warn: incompatible value for key 'en' in problem.yaml. SKIPPED.
@@ -35,9 +54,11 @@ warn: incompatible value for key 'en' in problem.yaml. SKIPPED.
 ---
 # Validation/type
 yaml:
+  problem_format_version: 2023-07-draft
   name: Incorrect validation
   validation: mumbo-jumbo
-fatal: "problem.yaml: unrecognized validation mode mumbo-jumbo."
+warn:
+  - "key 'validation' is deprecated, use 'type' instead. SKIPPED."
 ---
 yaml:
   problem_format_version: 2023-07-draft
@@ -50,7 +71,7 @@ yaml:
   name: Deprecated validation
   validation: interactive
 warn:
-  - "problem.yaml: 'validation' is removed in 2023-07-draft, please use 'type' instead. SKIPPED."
+  - "key 'validation' is deprecated, use 'type' instead. SKIPPED."
 ---
 yaml:
   problem_format_version: 2023-07-draft
@@ -78,3 +99,99 @@ yaml:
   name: Incorrect type (dict)
   type: 42
 fatal: "problem.yaml: 'type' must be a string or a sequence"
+
+---
+# Limits
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Negative time limit
+  limits:
+    time_limit: -1
+warn: "value for 'time_limit' in problem.yaml should be > 0 but is -1.0. SKIPPED."
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Time multiplier < 1
+  limits:
+    time_multipliers:
+      ac_to_time_limit: 0.9
+warn: "value for 'ac_to_time_limit' in problem.yaml should be >= 1 but is 0.9. SKIPPED."
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Only one pass for multi-pass
+  type: multi-pass
+  limits:
+    validation_passes: 1
+warn: "value for 'validation_passes' in problem.yaml should be >= 2 but is 1. SKIPPED."
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Fractional passes for multi-pass
+  type: multi-pass
+  limits:
+    validation_passes: 2.5
+warn: "incompatible value for key 'validation_passes' in problem.yaml. SKIPPED."
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: validation_passes for non-multi-pass problem
+  limits:
+    validation_passes: 3
+warn: "limit: validation_passes is only used for multi-pass problems. SKIPPED."
+
+---
+# Empty list
+yaml:
+  problem_format_version: 2023-07-draft
+  name: pass-fail type from empty type
+  type: []
+warn: "value for 'type' in problem.yaml should not be an empty list."
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Empty list
+  keywords: []
+warn: "value for 'keywords' in problem.yaml should not be an empty list."
+
+---
+# Credits
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Cannot specify multiple authors in credits
+  credits:
+    - name: Alice
+    - name: Audrey Authorson
+      email: bob@foo.bar
+warn: "incompatible value for key 'credits' in problem.yaml. SKIPPED."
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Cannot specify multiple authors in credits
+  credits:
+    authors:
+      - name: 42
+warn: "incompatible value for key 'name' in problem.yaml. SKIPPED."
+
+---
+# Source
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Source must have a name
+  source:
+    - url: https://2024.nwerc.example/contest
+warn: "problem.yaml: 'name' is required in source"
+
+---
+# Embargo
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Embargo is not a date
+  embargo_until: not a date
+warn: "incompatible value for key 'embargo_until' in problem.yaml. SKIPPED."
+#---
+#yaml:
+#  problem_format_version: 2023-07-draft
+#  name: Embargo date does not exist
+#  embargo_until: 2025-02-29
+# Note that this cannot be tested in this way, because the YAML parser already throws an error.
diff --git a/test/yaml/problem/valid.yaml b/test/yaml/problem/valid.yaml
index bdcea9de7..6a773a2e9 100644
--- a/test/yaml/problem/valid.yaml
+++ b/test/yaml/problem/valid.yaml
@@ -1,30 +1,35 @@
 ---
 # Problem name tests
 yaml:
+  problem_format_version: 2023-07-draft
   name: Minimal
 eq:
+  problem_format_version: 2023-07-draft
   name:
     en: Minimal
 ---
 yaml:
+  problem_format_version: 2023-07-draft
   name:
     en: Minimal
 ---
 yaml:
+  problem_format_version: 2023-07-draft
   name:
     en: Minimal
     nl: Minimaal
-
 ---
-# Problem validation/type tests
 yaml:
-  name: custom validation
-  validation: custom
-eq:
-  custom_output: True
-  interactive: False
-  multi_pass: False
+  problem_format_version: 2023-07-draft
+  name:
+    en: Hello World!
+    pt-BR: Olá mundo!
+    pt-PT: Oi mundo!
+    fil: Kumusta mundo!
+    gsw-u-sd-chzh: Sali Zämme, Wäut!
+
 ---
+# Problem type tests
 yaml:
   problem_format_version: 2023-07-draft
   name: pass-fail type
@@ -42,15 +47,6 @@ eq:
   interactive: False
   multi_pass: False
 ---
-yaml:
-  problem_format_version: 2023-07-draft
-  name: pass-fail type from empty type
-  type: []
-eq:
-  custom_output: False
-  interactive: False
-  multi_pass: False
----
 yaml:
   problem_format_version: 2023-07-draft
   name: interactive type
@@ -90,7 +86,8 @@ eq:
 ---
 # Credits tests
 yaml:
-  author: A. U. Thor
+  problem_format_version: 2023-07-draft
+  credits: A. U. Thor
 eq:
   credits:
     authors:
@@ -98,7 +95,8 @@ eq:
         email: ~
 ---
 yaml:
-  author: A. U. Thor <author@example.com>
+  problem_format_version: 2023-07-draft
+  credits: A. U. Thor <author@example.com>
 eq:
   credits:
     authors:
@@ -107,16 +105,8 @@ eq:
 ---
 yaml:
   problem_format_version: 2023-07-draft
-  credits: A. U. Thor
-eq:
   credits:
-    authors:
-      - name: A. U. Thor
-        email: ~
----
-yaml:
-  problem_format_version: 2023-07-draft
-  credits: A. U. Thor <author@example.com>
+    authors: A. U. Thor <author@example.com>
 eq:
   credits:
     authors:
@@ -125,8 +115,6 @@ eq:
 ---
 yaml:
   problem_format_version: 2023-07-draft
-  credits: A. U. Thor <author@example.com>
-eq:
   credits:
     authors:
       - name: A. U. Thor
@@ -151,3 +139,109 @@ eq:
       en:
         - name: T. R. Anslator
           email: translator@example.com
+
+---
+# Source tests
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Source can be just a string
+  source: NWERC 2024
+eq:
+  source:
+    - name: NWERC 2024
+      url: ~
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Source can be map
+  source:
+    name: NWERC 2024
+eq:
+  source:
+    - name: NWERC 2024
+      url: ~
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Source can be map with two keys (name, url)
+  source:
+    name: NWERC 2024
+    url: https://2024.nwerc.example/contest
+eq:
+  source:
+    - name: NWERC 2024
+      url: https://2024.nwerc.example/contest
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Many sources can be specified
+  source:
+    - name: NWERC 2024
+      url: https://2024.nwerc.example/contest
+    - SWERC 2024
+    - name: SEERC 2024
+eq:
+  source:
+    - name: NWERC 2024
+      url: https://2024.nwerc.example/contest
+    - name: SWERC 2024
+      url: ~
+    - name: SEERC 2024
+      url: ~
+
+---
+# License tests
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Default license is unknown and rights-less
+eq:
+  license: unknown
+  rights_owner:
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Rights-less license
+  license: public domain
+eq:
+  license: public domain
+  rights_owner:
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Specify license and rights owner
+  license: cc0
+  rights_owner: Bob
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Don't need rights_owner if credits are given
+  license: cc0
+  credits: Bob
+eq:
+  license: cc0
+  rights_owner:  # Allowed to be empty
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Don't need rights_owner if credits.authors are given
+  license: cc0
+  credits:
+    authors: "Bob"
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Don't need rights_owner if source is given
+  license: cc0
+  source: NWERC 2024
+
+---
+# Embargo tests
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Embargo date
+  embargo_until: 2025-12-31
+---
+yaml:
+  problem_format_version: 2023-07-draft
+  name: Embargo datetime
+  embargo_until: 2025-12-31T23:59:59