Skip to content

Commit 3915c38

Browse files
committed
Merge branch 'parser-module'
2 parents bfc52c7 + 361da74 commit 3915c38

File tree

18 files changed

+663
-103
lines changed

18 files changed

+663
-103
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
clean:
2-
rm -rf src/syntactes/__pycache__ src/syntactes/tests/__pycache__
2+
rm -rf src/syntactes/__pycache__ src/syntactes/tests/__pycache__ src/syntactes/parser/__pycache__
33
rm -rf dist src/syntactes.egg-info
44

55
test:

README.md

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
Python parser generator
99

1010
## Quick start
11+
12+
### Creating a parsing table
1113
```py
1214
from syntactes import Grammar, Rule, SLRGenerator, Token
1315

@@ -54,16 +56,86 @@ SLR PARSING TABLE
5456
-------------------------------------------------
5557
| | $ | + | E | S | T | x |
5658
-------------------------------------------------
57-
| 1 | -- | -- | s3 | -- | s4 | s2 |
59+
| 1 | -- | -- | s4 | -- | s2 | s3 |
5860
-------------------------------------------------
59-
| 2 | r4 | r4 | -- | -- | -- | -- |
61+
| 2 | r2 | s5 | -- | -- | -- | -- |
6062
-------------------------------------------------
61-
| 3 | a | -- | -- | -- | -- | -- |
62-
------------------------------------------------
63-
| 4 | r2 | s5 | -- | -- | -- | -- |
63+
| 3 | r4 | r4 | -- | -- | -- | -- |
6464
-------------------------------------------------
65-
| 5 | -- | -- | s6 | -- | s4 | s2 |
65+
| 4 | a | -- | -- | -- | -- | -- |
66+
------------------------------------------------
67+
| 5 | -- | -- | s6 | -- | s2 | s3 |
6668
-------------------------------------------------
6769
| 6 | r1 | -- | -- | -- | -- | -- |
6870
-------------------------------------------------
6971
```
72+
73+
### Parsing
74+
75+
```py
76+
from syntactes import Grammar, Rule, Token
77+
from syntactes.parser import ParserError, SLRParser, execute_on
78+
79+
EOF = Token.eof()
80+
S = Token("S", is_terminal=False)
81+
E = Token("E", False)
82+
T = Token("T", False)
83+
x = Token("x", True, 1) # value of token is 1
84+
PLUS = Token("+", True)
85+
86+
tokens = {EOF, S, E, T, x, PLUS}
87+
88+
# 0. S -> E $
89+
# 1. E -> T + E
90+
# 2. E -> T
91+
# 3. T -> x
92+
rule_1 = Rule(0, S, E, EOF)
93+
rule_2 = Rule(1, E, T, PLUS, E)
94+
rule_3 = Rule(2, E, T)
95+
rule_4 = Rule(4, T, x)
96+
97+
rules = (rule_1, rule_2, rule_3, rule_4)
98+
99+
grammar = Grammar(rule_1, rules, tokens)
100+
101+
parser = SLRParser.from_grammar(grammar)
102+
103+
104+
@execute_on(rule_4)
105+
def push_value(x_token):
106+
# Add and argument for every token on the right-hand side of the rule.
107+
print(
108+
f"received token {x_token} with value: {x_token.value}, reducing by rule: {rule_4}"
109+
)
110+
111+
112+
@execute_on(rule_2)
113+
def add(left, plus, right):
114+
print(f"received tokens {left}, {plus}, {right}, reducing by rule: {rule_2}")
115+
116+
117+
print("Parsing stream: x + x + x $\n")
118+
parser.parse([x, PLUS, x, PLUS, x, EOF])
119+
120+
print("\nParsing stream: x + $\n")
121+
try:
122+
parser.parse([x, PLUS, EOF])
123+
except ParserError as e:
124+
print("ParserError:", e)
125+
```
126+
127+
Running the above example produces this output:
128+
```
129+
Parsing stream: x + x + x $
130+
131+
received token x with value: 1, reducing by rule: T -> x
132+
received token x with value: 1, reducing by rule: T -> x
133+
received token x with value: 1, reducing by rule: T -> x
134+
received tokens E, +, T, reducing by rule: E -> T + E
135+
received tokens E, +, T, reducing by rule: E -> T + E
136+
137+
Parsing stream: x + $
138+
139+
received token x with value: 1, reducing by rule: T -> x
140+
ParserError: Received token: $; expected one of: ['x', 'T', 'E']
141+
```

examples/parser.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from syntactes import Grammar, Rule, Token
2+
from syntactes.parser import ParserError, SLRParser, execute_on
3+
4+
EOF = Token.eof()
5+
S = Token("S", is_terminal=False)
6+
E = Token("E", False)
7+
T = Token("T", False)
8+
x = Token("x", True, 1) # value of token is 1
9+
PLUS = Token("+", True)
10+
11+
tokens = {EOF, S, E, T, x, PLUS}
12+
13+
# 0. S -> E $
14+
# 1. E -> T + E
15+
# 2. E -> T
16+
# 3. T -> x
17+
rule_1 = Rule(0, S, E, EOF)
18+
rule_2 = Rule(1, E, T, PLUS, E)
19+
rule_3 = Rule(2, E, T)
20+
rule_4 = Rule(4, T, x)
21+
22+
rules = (rule_1, rule_2, rule_3, rule_4)
23+
24+
grammar = Grammar(rule_1, rules, tokens)
25+
26+
parser = SLRParser.from_grammar(grammar)
27+
28+
29+
@execute_on(rule_4)
30+
def push_value(x_token):
31+
# Add and argument for every token on the right-hand side of the rule.
32+
print(
33+
f"received token {x_token} with value: {x_token.value}, reducing by rule: {rule_4}"
34+
)
35+
36+
37+
@execute_on(rule_2)
38+
def add(left, plus, right):
39+
print(f"received tokens {left}, {plus}, {right}, reducing by rule: {rule_2}")
40+
41+
42+
print("Parsing stream: x + x + x $\n")
43+
parser.parse([x, PLUS, x, PLUS, x, EOF])
44+
45+
print("\nParsing stream: x + $\n")
46+
try:
47+
parser.parse([x, PLUS, EOF])
48+
except ParserError as e:
49+
print("ParserError:", e)
File renamed without changes.

src/syntactes/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from .generator import LR0Generator, SLRGenerator
2-
from .grammar import Grammar
3-
from .rule import Rule
41
from .token import Token
2+
from .rule import Rule
3+
from .grammar import Grammar
4+
from .generator import LR0Generator, SLRGenerator
5+
from .table import LR0ParsingTable, SLRParsingTable

src/syntactes/_action.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,27 @@ def __init__(self, actionable: Actionable, action_type: ActionType) -> None:
4343
self.actionable = actionable
4444
self.action_type = action_type
4545

46+
@staticmethod
47+
def shift(state: Actionable) -> "Action":
48+
"""
49+
Create a shift action to the given state.
50+
"""
51+
return Action(state, ActionType.SHIFT)
52+
53+
@staticmethod
54+
def reduce(rule: Actionable) -> "Action":
55+
"""
56+
Create a reduce action of the given rule.
57+
"""
58+
return Action(rule, ActionType.REDUCE)
59+
60+
@staticmethod
61+
def accept() -> "Action":
62+
"""
63+
Create an accept action.
64+
"""
65+
return Action(None, ActionType.ACCEPT)
66+
4667
def __repr__(self) -> str:
4768
return f"<Action: {self}>"
4869

src/syntactes/_state.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,8 @@
1-
from typing import Iterable, Protocol
1+
from typing import Iterable
22

33
from syntactes._item import LR0Item
44

55

6-
class State(Protocol):
7-
"""
8-
State of parser automaton. A state is a set of items.
9-
"""
10-
11-
items: set
12-
13-
def __repr__(self) -> str: ...
14-
def __str__(self) -> str: ...
15-
def __hash__(self) -> int: ...
16-
def __eq__(self) -> bool: ...
17-
18-
196
class LR0State:
207
"""
218
State of LR0 parser. A LR0 state is a set of LR0 items.
@@ -24,6 +11,7 @@ class LR0State:
2411
def __init__(self) -> None:
2512
self.number = None
2613
self.items = set()
14+
self.is_final = False
2715

2816
@staticmethod
2917
def from_items(items: Iterable[LR0Item]) -> "LR0State":
@@ -44,6 +32,9 @@ def add_item(self, item: LR0Item) -> None:
4432
def set_number(self, number: int) -> None:
4533
self.number = number
4634

35+
def set_final(self) -> None:
36+
self.is_final = True
37+
4738
def __repr__(self) -> str:
4839
return f"<LR0State: {self.number}>"
4940

src/syntactes/generator.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1+
from syntactes import Grammar, Token
12
from syntactes._action import Action, ActionType
23
from syntactes._item import LR0Item
34
from syntactes._state import LR0State
4-
from syntactes.grammar import Grammar
55
from syntactes.table import Entry, LR0ParsingTable, SLRParsingTable
6-
from syntactes.token import Token
76

87

98
class LR0Generator:
@@ -191,7 +190,11 @@ def _extend_states_and_shift_entries(
191190
EOF = Token.eof()
192191
for state in states:
193192
for item in state.items:
194-
if item.dot_is_last() or item.after_dot == EOF:
193+
if item.dot_is_last():
194+
continue
195+
196+
if item.after_dot == EOF:
197+
state.set_final()
195198
continue
196199

197200
new_items = self.goto(state.items, item.after_dot)
@@ -204,8 +207,7 @@ def _extend_states_and_shift_entries(
204207
number = _states.setdefault(new, len(_states) + 1)
205208
new.set_number(number)
206209

207-
action = Action(new, ActionType.SHIFT)
208-
_entries.add(Entry(state, item.after_dot, action))
210+
_entries.add(Entry(state, item.after_dot, Action.shift(new)))
209211

210212
return _states, _entries
211213

@@ -218,16 +220,14 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
218220
for state in states:
219221
for item in state.items:
220222
if item.after_dot == Token.eof():
221-
action = Action(item.rule, ActionType.ACCEPT)
222-
entries.add(Entry(state, Token.eof(), action))
223+
entries.add(Entry(state, Token.eof(), Action.accept()))
223224

224225
if not item.dot_is_last():
225226
continue
226227

227-
action = Action(item.rule, ActionType.REDUCE)
228228
for token in self.grammar.tokens:
229229
if token.is_terminal:
230-
entries.add(Entry(state, token, action))
230+
entries.add(Entry(state, token, Action.reduce(item.rule)))
231231

232232
return entries
233233

@@ -255,14 +255,12 @@ def _create_reduce_entries(self, states: set[LR0State]) -> set[Entry]:
255255
for state in states:
256256
for item in state.items:
257257
if item.after_dot == Token.eof():
258-
action = Action(item.rule, ActionType.ACCEPT)
259-
entries.add(Entry(state, Token.eof(), action))
258+
entries.add(Entry(state, Token.eof(), Action.accept()))
260259

261260
if not item.dot_is_last():
262261
continue
263262

264-
action = Action(item.rule, ActionType.REDUCE)
265263
for token in self._follow(item.rule.lhs):
266-
entries.add(Entry(state, token, action))
264+
entries.add(Entry(state, token, Action.reduce(item.rule)))
267265

268266
return entries

src/syntactes/grammar.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from typing import Iterable
22

3-
from syntactes.rule import Rule
4-
from syntactes.token import Token
3+
from syntactes import Rule, Token
54

65

76
class Grammar:

src/syntactes/parser/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .exception import NotAcceptedError, ParserError, UnexpectedTokenError
2+
from .execute import ExecutablesRegistry, execute_on
3+
from .parser import LR0Parser, SLRParser

0 commit comments

Comments
 (0)