From 83877967fc38fef31f2c8751534caf531fdb8531 Mon Sep 17 00:00:00 2001 From: mzuenni Date: Mon, 17 Nov 2025 17:57:00 +0100 Subject: [PATCH 1/4] implement match for generate.yaml --- bin/generate.py | 55 +++++++++++++++++-- bin/util.py | 13 ++++- .../generators/generators.yaml | 13 ++++- test/test_problems.py | 3 + 4 files changed, 74 insertions(+), 10 deletions(-) diff --git a/bin/generate.py b/bin/generate.py index 39cfda18..dd09dda8 100644 --- a/bin/generate.py +++ b/bin/generate.py @@ -84,6 +84,7 @@ def assert_type( "generate", "retries", "count", + "match", *(e[1:] for e in config.KNOWN_TEXT_DATA_EXTENSIONS), ) @@ -327,13 +328,9 @@ def default_solution_path(generator_config: "GeneratorConfig") -> Path: KNOWN_TESTCASE_KEYS: Final[Sequence[str]] = ( "type", - "generate", - "copy", "solution", "random_salt", - "retries", - "count", - *(e[1:] for e in config.KNOWN_TEXT_DATA_EXTENSIONS), + *UNIQUE_TESTCASE_KEYS, ) RESERVED_TESTCASE_KEYS: Final[Sequence[str]] = ("data", "test_group.yaml", "include") KNOWN_DIRECTORY_KEYS: Final[Sequence[str]] = ( @@ -471,7 +468,9 @@ def __init__( # This variable already includes the .in extension, so `.with_suffix()` works nicely. self.copy = None # 3. Hardcoded cases where the source is in the yaml file itself. - self.hardcoded = {} + self.hardcoded = dict[str, str]() + # list of patterns used to check the generated testcase.in + self.patterns = list[re.Pattern[str]]() # Hash of testcase for caching. self.hash: str @@ -623,6 +622,20 @@ def __init__( for ext, value in self.hardcoded.items(): hashes[ext] = hash_string(value) + if "match" in yaml: + match_entries = yaml["match"] + assert_type("`match`", match_entries, (list, str)) + if isinstance(match_entries, str): + match_entries = [match_entries] + assert isinstance(match_entries, list) + + for i, match_entry in enumerate(match_entries): + assert_type(f"`match[{i}]`", match_entry, str) + try: + self.patterns.append(re.compile(match_entry, re.MULTILINE | re.DOTALL)) + except re.error: + raise ParseException(f"could not parse regex `match[{i}]`.") + # Warn/Error for unknown keys. for any_key in yaml: if any_key in RESERVED_TESTCASE_KEYS: @@ -670,6 +683,7 @@ def get(key: str, default: T) -> T: self.rule_hashes: dict[object, object] = get("rule_hashes", {}) self.generated_extensions: list[object] = get("generated_extensions", []) self.input_validator_hashes: dict[object, object] = get("input_validator_hashes", {}) + self.matches: dict[object, object] = get("matches", {}) self.solution_hash: dict[object, object] = get("solution_hash", {}) self.interactor_hash: dict[object, object] = get("interactor_hash", {}) self.ans_out_validator_hashes: dict[object, object] = get( @@ -979,6 +993,31 @@ def generate_from_rule() -> bool: assert t._has_required_in(infile), f"Failed to generate in file: {infile.name}" return True + def check_match(testcase: Testcase, bar: ProgressBar) -> None: + nonlocal meta_yaml + + def get_pattern_str(pattern: re.Pattern[str]) -> str: + return pattern.pattern.encode("unicode_escape").decode() + + if all(meta_yaml.matches.get(get_pattern_str(p)) for p in t.patterns): + return + + updated = False + text = testcase.in_path.read_text() + for pattern in t.patterns: + if meta_yaml.matches.get(pattern.pattern): + continue + match = pattern.search(text) + if match: + match_str = f"[{match.start()}, {match.end()})" + bar.debug(f"Found match for '{get_pattern_str(pattern)}'': {match_str}") + meta_yaml.matches[pattern.pattern] = match_str + updated = True + else: + bar.warn(f"Found not match for '{get_pattern_str(pattern)}'") + if updated: + meta_yaml.write() + def generate_from_solution(testcase: Testcase, bar: ProgressBar) -> bool: nonlocal meta_yaml @@ -1278,6 +1317,10 @@ def add_test_case_to_cache() -> None: if not t.validate_in(problem, testcase, meta_yaml, bar): return + # Step 3.1: check patterns + # this is not a hard error since the testcase is still valid + check_match(testcase, bar) + # Step 4: generate .ans and .interaction if needed if not generate_from_solution(testcase, bar): return diff --git a/bin/util.py b/bin/util.py index 2add520c..ce36e234 100644 --- a/bin/util.py +++ b/bin/util.py @@ -226,7 +226,7 @@ def __init__( assert not (items and (max_len or count)) assert items is not None or max_len - if items is not None: + if items is not None and max_len is None: max_len = max((ProgressBar.item_len(x) for x in items), default=0) assert max_len is not None self.prefix: str = prefix # The prefix to always print @@ -575,7 +575,11 @@ def __init__( item: Optional[ITEM_TYPE] = None, ) -> None: self.prefix = str(prefix) if prefix else None - self.item_width = max_len + 1 if max_len is not None else None + self.item_width = None + if item is not None: + self.item_width = ProgressBar.item_len(item) + 1 + if max_len is not None: + self.item_width = max_len + 1 self.item = item def start(self, item: Optional[ITEM_TYPE] = None) -> "PrintBar": @@ -742,6 +746,11 @@ def parse_yaml( fatal(f"Duplicate key in yaml file {path}!\n{error.args[0]}\n{error.args[2]}") else: fatal(f"Duplicate key in yaml object!\n{str(error)}") + except Exception as e: + if suppress_errors: + return None + eprint(f"{Fore.YELLOW}{e}{Style.RESET_ALL}", end="") + fatal(f"Failed to parse {path}.") return ret else: diff --git a/test/problems/alternativeencryption/generators/generators.yaml b/test/problems/alternativeencryption/generators/generators.yaml index c63dc032..f42cb693 100644 --- a/test/problems/alternativeencryption/generators/generators.yaml +++ b/test/problems/alternativeencryption/generators/generators.yaml @@ -21,8 +21,17 @@ data: encrypt 1 a - - letters: eval.py 0 26 sigma[i % 26] - - letters: eval.py 0 1000 sigma[i % 26] + - letters: + generate: eval.py 0 26 sigma[i % 26] + match: + - "\\Aencrypt$" + - "^a$" + - "^c$" + - "^z$" + - "\\A(?!.*^[A-Z]$).*\\Z" + - letters: + generate: eval.py 0 1000 sigma[i % 26] + match: "^a$.*^z$" - random_equal: eval.py {seed} 1000 sigma[i % 26] * randrange(1, 101) - max_equal: eval.py 0 1000 sigma[i % 26] * 100 - random2: eval.py {seed} 1000 randstr(2) diff --git a/test/test_problems.py b/test/test_problems.py index c7108fc3..dfdbab47 100644 --- a/test/test_problems.py +++ b/test/test_problems.py @@ -55,6 +55,9 @@ def setup_alternativeencryption_problem(request): @pytest.mark.usefixtures("setup_alternativeencryption_problem") class TestAlternativeencryptionProblem: + def test_generate(self): + tools.test(["generate"]) + def test_check_testing_tool(self): tools.test(["check_testing_tool"]) From a4940d196a091670ecdfaae81374dd3ea0be88f7 Mon Sep 17 00:00:00 2001 From: mzuenni Date: Mon, 17 Nov 2025 18:33:59 +0100 Subject: [PATCH 2/4] retries are not unique to testcases? --- bin/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/generate.py b/bin/generate.py index dd09dda8..c28a4f69 100644 --- a/bin/generate.py +++ b/bin/generate.py @@ -82,7 +82,6 @@ def assert_type( UNIQUE_TESTCASE_KEYS: Final[Sequence[str]] = ( "copy", "generate", - "retries", "count", "match", *(e[1:] for e in config.KNOWN_TEXT_DATA_EXTENSIONS), @@ -330,6 +329,7 @@ def default_solution_path(generator_config: "GeneratorConfig") -> Path: "type", "solution", "random_salt", + "retries", *UNIQUE_TESTCASE_KEYS, ) RESERVED_TESTCASE_KEYS: Final[Sequence[str]] = ("data", "test_group.yaml", "include") From fc901906699e9602fcdd1ab5c20d0aa8fa0ee86b Mon Sep 17 00:00:00 2001 From: mzuenni Date: Mon, 17 Nov 2025 23:18:29 +0100 Subject: [PATCH 3/4] update doc --- bin/generate.py | 12 +++++------- doc/generators.md | 6 ++++-- doc/generators.yaml | 1 + 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/bin/generate.py b/bin/generate.py index c28a4f69..72ab9276 100644 --- a/bin/generate.py +++ b/bin/generate.py @@ -332,19 +332,17 @@ def default_solution_path(generator_config: "GeneratorConfig") -> Path: "retries", *UNIQUE_TESTCASE_KEYS, ) -RESERVED_TESTCASE_KEYS: Final[Sequence[str]] = ("data", "test_group.yaml", "include") +UNIQUE_DIRECTORY_KEYS: Final[Sequence[str]] = ("data", "test_group.yaml", "include") KNOWN_DIRECTORY_KEYS: Final[Sequence[str]] = ( "type", - "data", - "test_group.yaml", - "include", "solution", "random_salt", "retries", + *UNIQUE_DIRECTORY_KEYS, ) RESERVED_DIRECTORY_KEYS: Final[Sequence[str]] = ("command",) -KNOWN_ROOT_KEYS: Final[Sequence[str]] = ("generators", "parallel", "version") -DEPRECATED_ROOT_KEYS: Final[Sequence[str]] = ("gitignore_generated", "visualizer") +KNOWN_ROOT_KEYS: Final[Sequence[str]] = ("generators", "version") +DEPRECATED_ROOT_KEYS: Final[Sequence[str]] = ("gitignore_generated", "parallel", "visualizer") # Holds all inheritable configuration options. Currently: @@ -638,7 +636,7 @@ def __init__( # Warn/Error for unknown keys. for any_key in yaml: - if any_key in RESERVED_TESTCASE_KEYS: + if any_key in UNIQUE_DIRECTORY_KEYS: raise ParseException(f"Testcase must not contain reserved key {any_key}.") if any_key not in KNOWN_TESTCASE_KEYS: if config.args.action == "generate": diff --git a/doc/generators.md b/doc/generators.md index 60e627b8..9e55053d 100644 --- a/doc/generators.md +++ b/doc/generators.md @@ -29,10 +29,11 @@ The two main object types are `directory` and `generator`. The root of `generato - `test_group.yaml`: Optional yaml configuration that will be copied to `test_group.yaml` in this directory. - `solution`: Optional invocation of a solution to be used to generate `.ans` files. Set to empty to disable generating `.ans`. (Useful for e.g. the `data/samples/` directory.) This must be an absolute path relative to the problem root. - `random_salt`: Optional string that will be prepended to each command before computing its `{seed}`. May be used to regenerate all random cases and to prevent predictable seeds. +- `retries`: Optional int that specifies the maximum number of invocation that will be tried if the generator fails. Each invocation will use a different value for `{seed}`. - `data`: The test cases / test groups contained in this directory. This may take two forms: - A dictionary, each key is the name of a test case/test group, and each value must be a `directory` or `generator` object. - A list of dictionaries as above. In this case, testcases will be prefixed with zero padded 1-based integers in the order of the list. Items in the same dictionary will get the same number. -- `input`: Optional list of Directory object names (as strings) e.g. `- "sample"`. All testcases from those directories are linked for this directory. +- `include`: Optional list of Directory object names (as strings) e.g. `- "sample"`. All testcases from those directories are linked for this directory. **Generator objects** have the following forms: @@ -41,7 +42,8 @@ The two main object types are `directory` and `generator`. The root of `generato - `: `: A file with extension `ext` and the `content` will be generated. `` must be a known file extension. - `count: `. To generate multiple Generator objects. If `generate` is used and `{seed}` or `{seed:(0-9)+}` is present all Generator objects will use a different seed. The arguments of `generate` may contain `{count}` to refer to the index of this generator invocation. Or as a shorthand: -- `command` followed by the command as for `generate`. +- `match`: Optional `str` or list of `str`. Each entry should be a regex pattern. If the generated testcase does not match a pattern a warning will be shown. +- `solution`, `random_salt`, and `retries`: see **Directory objects**. The follwoing things should hold: - A `.in` file must be specified/generated by this diff --git a/doc/generators.yaml b/doc/generators.yaml index a38be444..3918b2f1 100644 --- a/doc/generators.yaml +++ b/doc/generators.yaml @@ -117,6 +117,7 @@ data: "04": tree 5 # keys from the global generators: dictionary may also be used. "05": generate: tree 6 # same as above, but with different argument + match: "^1 2$" # check that there is an edge from 1 to 2 (can also be a list with multiple pattern) # Arguments are split on white space: this will pass two arguments: `"a` and `b"`, so probably not what is intended. 06-string: tree "a b" From 5011ab1d0a1891095a3db0909237d36c40f2e4d5 Mon Sep 17 00:00:00 2001 From: mzuenni Date: Wed, 19 Nov 2025 17:22:09 +0100 Subject: [PATCH 4/4] update schema? --- support/schemas/generators.cue | 1 + support/schemas/generators_yaml_schema.json | 15 ++++++++++++++ .../invalid_yaml/invalid.generators.yaml | 20 +++++++++++++++++++ .../valid_yaml/rich-generators.yaml | 9 +++++++++ 4 files changed, 45 insertions(+) diff --git a/support/schemas/generators.cue b/support/schemas/generators.cue index bd94e0da..28531a99 100644 --- a/support/schemas/generators.cue +++ b/support/schemas/generators.cue @@ -40,6 +40,7 @@ import "strings" // The "copy" key uses a path relative to "/generators/" ending in a test case name, // such as "manual/samples/3". copy?: #dirpath + match?: string | [...string] ["in" | "in.statement" | "in.download" | "ans" | "ans.statement" | "ans.download" | diff --git a/support/schemas/generators_yaml_schema.json b/support/schemas/generators_yaml_schema.json index 9a5d2a85..9eabefa1 100644 --- a/support/schemas/generators_yaml_schema.json +++ b/support/schemas/generators_yaml_schema.json @@ -250,6 +250,21 @@ "title": "Count", "description": "Generate this number of test cases, substituting `{count}` in the `generate:` command with values between 1 and `count`, inclusive." }, + "match": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "title": "Match", + "description": "Regular expression(s) that are searched in the test case. Each regular expression that is not found results in a warning." + }, "in": { "type": "string", "title": "Input", diff --git a/test/yaml/generators/invalid_yaml/invalid.generators.yaml b/test/yaml/generators/invalid_yaml/invalid.generators.yaml index 6fc27a90..fe2f0b75 100644 --- a/test/yaml/generators/invalid_yaml/invalid.generators.yaml +++ b/test/yaml/generators/invalid_yaml/invalid.generators.yaml @@ -381,6 +381,26 @@ data: generate: my_generator {count} count: 101 --- +# match must be a string or list of strings +data: + sample: {} + secret: + data: + - '': + in: "1 2" + match: 1 +--- +# match must be a string or list of strings +data: + sample: {} + secret: + data: + - '': + in: "1 2" + match: + - "1" + - 2 +--- # No test_group.yaml on testcase level # TODO Not picked up by JSON schema data: diff --git a/test/yaml/generators/valid_yaml/rich-generators.yaml b/test/yaml/generators/valid_yaml/rich-generators.yaml index 26d62cbb..7097268e 100644 --- a/test/yaml/generators/valid_yaml/rich-generators.yaml +++ b/test/yaml/generators/valid_yaml/rich-generators.yaml @@ -28,6 +28,15 @@ data: 'morecurlies': generate: my_generator {seed:1} --name {name} --ctr {count} --arg {count} count: 5 + 'match': + in: "1 2" + match: "1" + 'morematch': + in: "1 2 3" + match: + - "1" + - "2" + - "3" 'group_with_test_group_yaml': test_group.yaml: input_validator_args: [--connected, --max_n, "2000"]