forked from ShivamSarodia/ShivyC
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
209 lines (154 loc) · 6.32 KB
/
utils.py
File metadata and controls
209 lines (154 loc) · 6.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
"""Utilities for the parser."""
from contextlib import contextmanager
import copy
from shivyc.errors import CompilerError, Range
# This is a little bit messy, but worth the repetition it saves. In the
# parser.py file, the main parse function sets this global variable to the
# list of tokens. Then, all functions in the parser can reference this
# variable rather than passing around the tokens list everywhere.
tokens = None
class SimpleSymbolTable:
"""Table to record every declared symbol.
This is required to parse typedefs in C, because the parser must know
whether a given identifier denotes a type or a value. For every
declared identifier, the table records whether or not it is a type
defnition.
"""
def __init__(self):
self.symbols = []
self.new_scope()
def new_scope(self):
self.symbols.append({})
def end_scope(self):
self.symbols.pop()
def add_symbol(self, identifier, is_typedef):
self.symbols[-1][identifier.content] = is_typedef
def is_typedef(self, identifier):
name = identifier.content
for table in self.symbols[::-1]:
if name in table:
return table[name]
return False
symbols = SimpleSymbolTable()
class ParserError(CompilerError):
"""Class representing parser errors.
amount_parsed (int) - Number of tokens successfully parsed before this
error was encountered. This value is used by the Parser to determine which
error corresponds to the most successful parse.
"""
# Options for the message_type constructor field.
#
# AT generates a message like "expected semicolon at '}'", GOT generates a
# message like "expected semicolon, got '}'", and AFTER generates a message
# like "expected semicolon after '15'" (if possible).
#
# As a very general guide, use AT when a token should be removed, use AFTER
# when a token should be to be inserted (esp. because of what came before),
# and GOT when a token should be changed.
AT = 1
GOT = 2
AFTER = 3
def __init__(self, message, index, tokens, message_type):
"""Initialize a ParserError from the given arguments.
message (str) - Base message to put in the error.
tokens (List[Token]) - List of tokens.
index (int) - Index of the offending token.
message_type (int) - One of self.AT, self.GOT, or self.AFTER.
Example:
ParserError("unexpected semicolon", 10, [...], self.AT)
-> CompilerError("unexpected semicolon at ';'", ..., ...)
-> "main.c:10: unexpected semicolon at ';'"
"""
self.amount_parsed = index
if len(tokens) == 0:
super().__init__(f"{message} at beginning of source")
return
# If the index is too big, we're always using the AFTER form
if index >= len(tokens):
index = len(tokens)
message_type = self.AFTER
# If the index is too small, we should not use the AFTER form
elif index <= 0:
index = 0
if message_type == self.AFTER:
message_type = self.GOT
if message_type == self.AT:
super().__init__(f"{message} at '{tokens[index]}'",
tokens[index].r)
elif message_type == self.GOT:
super().__init__(f"{message}, got '{tokens[index]}'",
tokens[index].r)
elif message_type == self.AFTER:
if tokens[index - 1].r:
new_range = Range(tokens[index - 1].r.end + 1)
else:
new_range = None
super().__init__(
f"{message} after '{tokens[index - 1]}'", new_range)
def raise_error(err, index, error_type):
"""Raise a parser error."""
global tokens
raise ParserError(err, index, tokens, error_type)
# Used to store the best error found in the parsing phase.
best_error = None
@contextmanager
def log_error():
"""Wrap this context manager around conditional parsing code.
For example,
with log_error():
[try parsing something]
return
[try parsing something else]
will run the code in [try parsing something]. If an error occurs,
it will be saved and then [try parsing something else] will run.
The value of e.amount_parsed is used to determine the amount
successfully parsed before encountering the error.
"""
global best_error, symbols
# back up the global symbols table, so if parsing fails we can reset it
symbols_bak = copy.deepcopy(symbols)
try:
yield
except ParserError as e:
if not best_error or e.amount_parsed >= best_error.amount_parsed:
best_error = e
symbols = symbols_bak
def token_is(index, kind):
"""Return true if the next token is of the given kind."""
global tokens
return len(tokens) > index and tokens[index].kind == kind
def token_in(index, kinds):
"""Return true if the next token is in the given list/set of kinds."""
global tokens
return len(tokens) > index and tokens[index].kind in kinds
def match_token(index, kind, message_type, message=None):
"""Raise ParserError if tokens[index] is not of the expected kind.
If tokens[index] is of the expected kind, returns index + 1.
Otherwise, raises a ParserError with the given message and
message_type.
"""
global tokens
if not message:
message = f"expected '{kind.text_repr}'"
if token_is(index, kind):
return index + 1
else:
raise ParserError(message, index, tokens, message_type)
def token_range(start, end):
"""Generate a range that encompasses tokens[start] to tokens[end-1]"""
global tokens
start_index = min(start, len(tokens) - 1, end - 1)
end_index = min(end - 1, len(tokens) - 1)
return tokens[start_index].r + tokens[end_index].r
def add_range(parse_func):
"""Return a decorated function that tags the produced node with a range.
Accepts a parse_* function, and returns a version of the function where
the returned node has its range attribute set
"""
global tokens
def parse_with_range(index, *args):
start_index = index
node, end_index = parse_func(index, *args)
node.r = token_range(start_index, end_index)
return node, end_index
return parse_with_range