diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index d912c55812397d..0387b9395611b0 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -1106,3 +1106,49 @@ def test_deep_nested_rule(self) -> None: ) self.assertEqual(output, expected_output) + + def test_rule_flags(self) -> None: + """Test the new rule flags syntax that accepts arbitrary lists of flags.""" + # Test grammar with various flag combinations + grammar_source = """ + start: simple_rule + + simple_rule (memo): + | "hello" + + multi_flag_rule (memo, custom, test): + | "world" + + single_custom_flag (custom): + | "test" + + no_flags_rule: + | "plain" + """ + + grammar: Grammar = parse_string(grammar_source, GrammarParser) + rules = grammar.rules + + # Test memo-only rule + simple_rule = rules['simple_rule'] + self.assertTrue(simple_rule.memo, "simple_rule should have memo=True") + self.assertEqual(simple_rule.flags, frozenset(['memo']), + f"simple_rule flags should be {'memo'}, got {simple_rule.flags}") + + # Test multi-flag rule + multi_flag_rule = rules['multi_flag_rule'] + self.assertTrue(multi_flag_rule.memo, "multi_flag_rule should have memo=True") + self.assertEqual(multi_flag_rule.flags, frozenset({'memo', 'custom', 'test'}), + f"multi_flag_rule flags should contain memo, custom, test, got {multi_flag_rule.flags}") + + # Test single custom flag rule + single_custom_rule = rules['single_custom_flag'] + self.assertFalse(single_custom_rule.memo, "single_custom_flag should have memo=False") + self.assertEqual(single_custom_rule.flags, frozenset(['custom']), + f"single_custom_flag flags should be {'custom'}, got {single_custom_rule.flags}") + + # Test no flags rule + no_flags_rule = rules['no_flags_rule'] + self.assertFalse(no_flags_rule.memo, "no_flags_rule should have memo=False") + self.assertEqual(no_flags_rule.flags, [], + f"no_flags_rule flags should be the empty set, got {no_flags_rule.flags}") diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py index fa75174ea0d59d..ffa73a64f21cfe 100644 --- a/Tools/peg_generator/pegen/c_generator.py +++ b/Tools/peg_generator/pegen/c_generator.py @@ -595,7 +595,7 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: self.print(f"{node.name}_raw(Parser *p)") def _should_memoize(self, node: Rule) -> bool: - return node.memo and not node.left_recursive + return "memo" in node.flags and not node.left_recursive def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None: memoize = self._should_memoize(node) diff --git a/Tools/peg_generator/pegen/grammar.py b/Tools/peg_generator/pegen/grammar.py index cca8584a632071..d3c2eca6615a9f 100644 --- a/Tools/peg_generator/pegen/grammar.py +++ b/Tools/peg_generator/pegen/grammar.py @@ -58,11 +58,11 @@ def __iter__(self) -> Iterator[Rule]: class Rule: - def __init__(self, name: str, type: str | None, rhs: Rhs, memo: object | None = None): + def __init__(self, name: str, type: str | None, rhs: Rhs, flags: frozenset[str] | None = None): self.name = name self.type = type self.rhs = rhs - self.memo = bool(memo) + self.flags = flags or frozenset() self.left_recursive = False self.leader = False @@ -135,7 +135,6 @@ def __repr__(self) -> str: class Rhs: def __init__(self, alts: list[Alt]): self.alts = alts - self.memo: tuple[str | None, str] | None = None def __str__(self) -> str: return " | ".join(str(alt) for alt in self.alts) @@ -263,7 +262,6 @@ class Repeat: def __init__(self, node: Plain): self.node = node - self.memo: tuple[str | None, str] | None = None def __iter__(self) -> Iterator[Plain]: yield self.node diff --git a/Tools/peg_generator/pegen/grammar_parser.py b/Tools/peg_generator/pegen/grammar_parser.py index 2e3a607f7209b0..4fa2739270773f 100644 --- a/Tools/peg_generator/pegen/grammar_parser.py +++ b/Tools/peg_generator/pegen/grammar_parser.py @@ -147,12 +147,12 @@ def rules(self) -> Optional[RuleList]: @memoize def rule(self) -> Optional[Rule]: - # rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE + # rule: rulename flags? ":" alts NEWLINE INDENT more_alts DEDENT | rulename flags? ":" NEWLINE INDENT more_alts DEDENT | rulename flags? ":" alts NEWLINE mark = self._mark() if ( (rulename := self.rulename()) and - (opt := self.memoflag(),) + (flags := self.flags(),) and (literal := self.expect(":")) and @@ -166,12 +166,12 @@ def rule(self) -> Optional[Rule]: and (_dedent := self.expect('DEDENT')) ): - return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , memo = opt ) + return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , flags = flags ) self._reset(mark) if ( (rulename := self.rulename()) and - (opt := self.memoflag(),) + (flags := self.flags(),) and (literal := self.expect(":")) and @@ -183,12 +183,12 @@ def rule(self) -> Optional[Rule]: and (_dedent := self.expect('DEDENT')) ): - return Rule ( rulename [0] , rulename [1] , more_alts , memo = opt ) + return Rule ( rulename [0] , rulename [1] , more_alts , flags = flags ) self._reset(mark) if ( (rulename := self.rulename()) and - (opt := self.memoflag(),) + (flags := self.flags(),) and (literal := self.expect(":")) and @@ -196,7 +196,7 @@ def rule(self) -> Optional[Rule]: and (_newline := self.expect('NEWLINE')) ): - return Rule ( rulename [0] , rulename [1] , alts , memo = opt ) + return Rule ( rulename [0] , rulename [1] , alts , flags = flags ) self._reset(mark) return None @@ -219,17 +219,28 @@ def rulename(self) -> Optional[RuleName]: return None @memoize - def memoflag(self) -> Optional[str]: - # memoflag: '(' "memo" ')' + def flags(self) -> Optional[frozenset [str]]: + # flags: '(' ','.flag+ ')' mark = self._mark() if ( (literal := self.expect('(')) and - (literal_1 := self.expect("memo")) + (a := self._gather_2()) and - (literal_2 := self.expect(')')) + (literal_1 := self.expect(')')) + ): + return frozenset ( a ) + self._reset(mark) + return None + + @memoize + def flag(self) -> Optional[str]: + # flag: NAME + mark = self._mark() + if ( + (name := self.name()) ): - return "memo" + return name . string self._reset(mark) return None @@ -661,8 +672,38 @@ def target_atom(self) -> Optional[str]: self._reset(mark) return None + @memoize + def _loop0_1(self) -> Optional[Any]: + # _loop0_1: ',' flag + mark = self._mark() + children = [] + while ( + (literal := self.expect(',')) + and + (elem := self.flag()) + ): + children.append(elem) + mark = self._mark() + self._reset(mark) + return children + + @memoize + def _gather_2(self) -> Optional[Any]: + # _gather_2: flag _loop0_1 + mark = self._mark() + if ( + (elem := self.flag()) + is not None + and + (seq := self._loop0_1()) + is not None + ): + return [elem] + seq + self._reset(mark) + return None + KEYWORDS = () - SOFT_KEYWORDS = ('memo',) + SOFT_KEYWORDS = () if __name__ == '__main__': diff --git a/Tools/peg_generator/pegen/metagrammar.gram b/Tools/peg_generator/pegen/metagrammar.gram index f484c4781823bc..cae91ab9c4165b 100644 --- a/Tools/peg_generator/pegen/metagrammar.gram +++ b/Tools/peg_generator/pegen/metagrammar.gram @@ -50,19 +50,21 @@ rules[RuleList]: | rule { [rule] } rule[Rule]: - | rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT { - Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), memo=opt) } - | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT { - Rule(rulename[0], rulename[1], more_alts, memo=opt) } - | rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) } + | rulename flags=flags? ":" alts NEWLINE INDENT more_alts DEDENT { + Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), flags=flags) } + | rulename flags=flags? ":" NEWLINE INDENT more_alts DEDENT { + Rule(rulename[0], rulename[1], more_alts, flags=flags) } + | rulename flags=flags? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, flags=flags) } rulename[RuleName]: | NAME annotation { (name.string, annotation) } | NAME { (name.string, None) } -# In the future this may return something more complicated -memoflag[str]: - | '(' "memo" ')' { "memo" } +flags[frozenset[str]]: + | '(' a=','.flag+ ')' { frozenset(a) } + +flag[str]: + | NAME { name.string } alts[Rhs]: | alt "|" alts { Rhs([alt] + alts.alts)}