Skip to content

Commit b6de7ad

Browse files
committed
Defines parser module
1 parent c2b63f5 commit b6de7ad

File tree

2 files changed

+244
-0
lines changed

2 files changed

+244
-0
lines changed

src/syntactes/parser/parser.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
from collections import deque
2+
from typing import Iterable
3+
4+
from syntactes import Token
5+
from syntactes._action import Action, ActionType
6+
from syntactes._state import LR0State
7+
from syntactes.parser import (
8+
ExecutablesRegistry,
9+
NotAcceptedError,
10+
ParserError,
11+
UnexpectedTokenError,
12+
)
13+
from syntactes.table import LR0ParsingTable, SLRParsingTable
14+
15+
16+
class LR0Parser:
17+
"""
18+
Parses streams of tokens based on the configured parsing table.
19+
"""
20+
21+
def __init__(self, table: LR0ParsingTable) -> None:
22+
self._table = table
23+
self._token_stack: deque[Token] = deque()
24+
self._state_stack: deque[LR0State] = deque()
25+
self._token_stream: deque[Token] = deque()
26+
27+
def parse(self, stream: Iterable[Token]) -> None:
28+
"""
29+
Parses the given stream of tokens. Expects the EOF token as the last one.
30+
31+
Raises `syntactes.parser.UnexpectedTokenError` if an unexpected token is
32+
received.
33+
34+
Raises `syntactes.parser.NotAcceptedError` if the stream of token has been
35+
parsed and the parser did not receive an accept action.
36+
"""
37+
self._set_state(self._table.initial_state)
38+
self._token_stream.extend(stream)
39+
40+
while len(self._token_stream) > 0:
41+
token = self._token_stream.popleft()
42+
self._apply_action(token, self._get_action(token))
43+
44+
if token != Token.eof():
45+
self._raise(NotAcceptedError("Expected EOF token. "))
46+
47+
if not self._get_state().is_final:
48+
actions = self._table.get(self._get_state())
49+
expected_tokens = [] if actions is None else list(actions.keys())
50+
self._raise(UnexpectedTokenError(Token.eof(), expected_tokens))
51+
52+
def _apply_action(self, token: Token, action: Action) -> None:
53+
if action.action_type == ActionType.SHIFT:
54+
self._token_stack.append(token)
55+
self._set_state(action.actionable)
56+
elif action.action_type == ActionType.REDUCE:
57+
rule = action.actionable
58+
args = [self._token_stack.pop() for _ in reversed(rule.rhs)]
59+
self._token_stack.append(rule.lhs)
60+
61+
{self._state_stack.pop() for _ in rule.rhs}
62+
63+
executable = ExecutablesRegistry.get(rule)
64+
executable(*args)
65+
66+
self._token_stream.appendleft(token) # reduce actions do not consume tokenA
67+
68+
shift = self._get_action(rule.lhs)
69+
self._set_state(shift.actionable)
70+
71+
def _get_action(self, token: Token) -> Action:
72+
actions = self._table.get_actions(self._get_state(), token)
73+
if actions is None:
74+
actions = self._table.get(self._get_state())
75+
expected_tokens = [] if actions is None else list(actions.keys())
76+
self._raise(UnexpectedTokenError(token, expected_tokens))
77+
78+
action = self._resolve_conflict(actions)
79+
return action
80+
81+
def _resolve_conflict(self, actions: list[Action]) -> Action:
82+
return actions[0]
83+
84+
def _set_state(self, state: LR0State) -> None:
85+
self._state_stack.append(state)
86+
87+
def _get_state(self) -> LR0State:
88+
return self._state_stack[-1]
89+
90+
def _cleanup(self) -> None:
91+
self._token_stack.clear() if len(self._token_stack) > 0 else ...
92+
self._state_stack.clear() if len(self._state_stack) > 0 else ...
93+
self._token_stream.clear() if len(self._token_stream) > 0 else ...
94+
95+
def _raise(self, error: ParserError) -> None:
96+
self._cleanup()
97+
raise error from None
98+
99+
100+
class SLRParser(LR0Parser):
101+
"""
102+
Parses streams of tokens based on the configured parsing table.
103+
"""

src/syntactes/tests/test_parser.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
from unittest_extensions import TestCase, args
2+
3+
from syntactes import Token
4+
from syntactes.parser import (
5+
ExecutablesRegistry,
6+
LR0Parser,
7+
ParserError,
8+
SLRParser,
9+
execute_on,
10+
)
11+
from syntactes.tests.data import (
12+
EOF,
13+
PLUS,
14+
lr0_parsing_table,
15+
rule_2,
16+
rule_4,
17+
slr_parsing_table,
18+
x,
19+
)
20+
21+
x1 = Token("x", True, 1)
22+
x2 = Token("x", True, 2)
23+
24+
25+
class TestLR0Parser(TestCase):
26+
def parser(self):
27+
return self._parser
28+
29+
def setUp(self):
30+
self._parser = LR0Parser(lr0_parsing_table())
31+
32+
def assert_parser_error(self):
33+
self.assertResultRaises(ParserError)
34+
35+
36+
class TestLR0ParserParse(TestLR0Parser):
37+
def subject(self, *stream):
38+
return self.parser().parse(stream)
39+
40+
@args(x, EOF)
41+
def test_simple_x(self):
42+
self.result()
43+
44+
@args(x, PLUS, x, EOF)
45+
def test_x_plus_x(self):
46+
self.result()
47+
48+
@args(x)
49+
def test_no_eof_raises(self):
50+
self.assert_parser_error()
51+
52+
@args(x, x)
53+
def test_x_x_raises(self):
54+
self.assert_parser_error()
55+
56+
@args(x, PLUS)
57+
def test_x_plus_raises(self):
58+
self.assert_parser_error()
59+
60+
@args(x, PLUS, EOF)
61+
def test_x_plus_eof_raises(self):
62+
self.assert_parser_error()
63+
64+
@args(EOF)
65+
def test_eof_raises(self):
66+
self.assert_parser_error()
67+
68+
69+
class TestLR0ParserParseExecutables(TestLR0Parser):
70+
def subject(self, *stream):
71+
self.parser().parse(stream)
72+
return self.sum
73+
74+
def add(self, _right, _plus, _left):
75+
self.sum += 1
76+
77+
def setUp(self):
78+
self.sum = 0
79+
self.add = execute_on(rule_2)(self.add)
80+
super().setUp()
81+
82+
@args(x, PLUS, x, EOF)
83+
def test_x_plus_x(self):
84+
self.assertResult(1)
85+
86+
@args(x, PLUS, x, PLUS, x, EOF)
87+
def test_x_plus_x_plus_x(self):
88+
self.assertResult(2)
89+
90+
91+
class TestLR0ParserParseExecutablesTokenValues(TestLR0Parser):
92+
def subject(self, *stream):
93+
self.parser().parse(stream)
94+
return self.sum
95+
96+
def add(self, x):
97+
self.sum += x.value
98+
99+
def setUp(self):
100+
self.sum = 0
101+
execute_on(rule_4)(self.add)
102+
super().setUp()
103+
104+
def tearDown(self):
105+
ExecutablesRegistry.clear()
106+
107+
@args(x1, PLUS, x1, EOF)
108+
def test_x1_plus_x1(self):
109+
self.assertResult(2)
110+
111+
@args(x1, PLUS, x2, EOF)
112+
def test_x1_plus_x2(self):
113+
self.assertResult(3)
114+
115+
@args(x2, PLUS, x2, EOF)
116+
def test_x2_plus_x2(self):
117+
self.assertResult(4)
118+
119+
120+
class TestSLRParser(TestCase):
121+
def parser(self):
122+
return self._parser
123+
124+
def setUp(self):
125+
self._parser = SLRParser(slr_parsing_table())
126+
127+
def assert_parser_error(self):
128+
self.assertResultRaises(ParserError)
129+
130+
131+
class TestSLRParserParse(TestSLRParser):
132+
def subject(self, *stream):
133+
return self.parser().parse(stream)
134+
135+
@args(x, x, EOF)
136+
def test_x_x_eof_raises(self):
137+
self.assert_parser_error()
138+
139+
@args(x, PLUS, x, EOF)
140+
def test_x_plus_x(self):
141+
self.result()

0 commit comments

Comments
 (0)