-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmypl_lexer.py
More file actions
286 lines (264 loc) · 10.7 KB
/
mypl_lexer.py
File metadata and controls
286 lines (264 loc) · 10.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#!/usr/bin/python3
#
# Author: Vincent Lombardi
# Course: CPSC 326, Spring 2019
# Assignment: 3
# Description: This file splits up source code into tokens
# and it checks for basic lexical errors.
# --------------------------------------------------------
import mypl_token as token
import mypl_error as error
class Lexer(object):
def __init__(self, input_stream):
self.line = 1
self.column = 0 # index of a tokens first character
self.input_stream = input_stream
self.column_index = 0
# index of pointer in the input stream
def __peek(self):
pos = self.input_stream.tell()
symbol = self.input_stream.read(1)
self.input_stream.seek(pos)
return symbol
def __read(self):
if self.__peek() == '\n':
self.line += 1
self.column = 0
self.column_index = 0
else:
self.column_index += 1
return self.input_stream.read(1)
# this function checks to see if there are
# any unusual characters that should end the token
def check_char(self):
if self.__peek() == '':
return 0
elif self.__peek() == ",":
return 0
elif self.__peek() == '=':
return 0
elif self.__peek() == "#":
return 0
elif self.__peek() == '>':
return 0
elif self.__peek() == '<':
return 0
elif self.__peek() == '!':
return 0
elif self.__peek() == "'":
return 0
elif self.__peek() == ":":
return 0
elif self.__peek() == "/":
return 0
elif self.__peek() == ".":
return 0
elif self.__peek() == "(":
return 0
elif self.__peek() == ")":
return 0
elif self.__peek() == "-":
return 0
elif self.__peek() == "%":
return 0
elif self.__peek() == "*":
return 0
elif self.__peek() == "+":
return 0
elif self.__peek() == ";":
return 0
elif self.__peek() == " ":
return 0
elif self.__peek() == "\n":
return 0
else:
return 1
# this function checks for comments
def __comment_check(self):
while self.__peek() == '#':
while self.__peek() != '\n' and self.__peek() != '':
self.__read()
self.__read() # remove newline after comment
while self.__peek().isspace(): # removes space characters
self.__read()
# defines the next token
def next_token(self):
tokentype = token.ID
item = ''
isStringVal = False # makes sure that string values are set as string values
error_message = "Lexer Error "
while self.__peek().isspace():
# increments column for every character of whitespace found
if self.__peek() == " ":
self.column += 1
self.__read()
self.__comment_check()
if self.__peek() == '': # EOS end of file
tokentype = token.EOS
# special character such as plus or minus
if self.check_char() == 0:
self.column += 1
item += self.__read()
# checks for and invalid dot
if item == "." and str(self.__peek()).isnumeric():
error_message += "invalid float value"
e = error.MyPLError(error_message, self.line, self.column_index - 1)
raise e
# checks for comparison operators
if item == '=' or item == '>' or item == '<' or item == '!':
if self.__peek() == '=':
item += self.__read()
elif self.__peek() == '"': # string value
self.column += 1 # increments column to a tokens starting index
self.__read()
while self.__peek() != '"':
if self.__peek() == '\n':
error_message += "reached newline character in string"
e = error.MyPLError(error_message, self.line, self.column_index)
raise e
elif self.__peek() == '':
error_message += "reached EOS character in string"
e = error.MyPLError(error_message, self.line, self.column_index)
raise e
else:
item += self.__read()
self.__read()
tokentype = token.STRINGVAL
isStringVal = True;
else: # any other type of character
self.column += 1 # increments column to a tokens starting index
isnum = False # tracks if you are entering a number
item += self.__read()
# sets while loop to true if you are entering a number
if item.isnumeric():
isnum = True
if self.check_char() == 1:
# checks for characters that should end the token
# checks if a number starts with zero
# the previous if statement will check if the next
# character is a decimal point
if isnum and item == '0':
error_message += "unexpected symbol '" + str(self.__peek()) + "'"
e = error.MyPLError(error_message, self.line, self.column_index)
raise e
# runs until it reaches a character that marks the end of the token
while self.check_char() != 0 and self.__peek() != '"':
if isnum and not item.isnumeric():
error_message += "unexpected value '" + str(item[len(item) - 1]) + "'"
e = error.MyPLError(error_message, self.line, self.column_index - 1)
raise e
else:
self.__comment_check()
item += self.__read()
if not isStringVal:
if item == "string": # strips spaces
tokentype = token.STRINGTYPE
else:
item = item.strip()
if item.isnumeric() and tokentype != token.STRINGVAL: # int and float check
if self.__peek() == ".":
item += self.__read()
decimal = str(self.__peek())
if not decimal.isnumeric(): # checks for an invalid float character
error_message += "missing digit in float value"
e = error.MyPLError(error_message, self.line, self.column_index - 1)
raise e
while decimal.isnumeric and (self.check_char() != 0 or self.__peek() == "."):
decimal += self.__peek()
if not decimal.isnumeric() and self.__peek() != ";":
self.__peek()
error_message += "unexpected character '" + str(self.__peek()) + "'"
e = error.MyPLError(error_message, self.line, self.column_index + 1)
raise e
else:
item += self.__read()
item = item.strip()
if item.count('.', 0, len(item)):
tokentype = token.FLOATVAL
else:
tokentype = token.INTVAL
# checks if the token is a special character and sets token type accordingly
if not isStringVal:
if item == "=":
tokentype = token.ASSIGN
elif item == ",":
tokentype = token.COMMA
elif item == ":":
tokentype = token.COLON
elif item == "/":
tokentype = token.DIVIDE
elif item == ".":
tokentype = token.DOT
elif item == "==":
tokentype = token.EQUAL
elif item == ">":
tokentype = token.GREATER_THAN
elif item == ">=":
tokentype = token.GREATER_THAN_EQUAL
elif item == "<":
tokentype = token.LESS_THAN
elif item == "<=":
tokentype = token.LESS_THAN_EQUAL
elif item == "!=":
tokentype = token.NOT_EQUAL
elif item == "(":
tokentype = token.LPAREN
elif item == ")":
tokentype = token.RPAREN
elif item == "-":
tokentype = token.MINUS
elif item == "%":
tokentype = token.MODULO
elif item == "*":
tokentype = token.MULTIPLY
elif item == "+":
tokentype = token.PLUS
elif item == "true" or item == "false":
tokentype = token.BOOLVAL
elif item == ";":
tokentype = token.SEMICOLON
elif item == "bool":
tokentype = token.BOOLTYPE
elif item == "int":
tokentype = token.INTTYPE
elif item == "float":
tokentype = token.FLOATTYPE
elif item == "struct":
tokentype = token.STRUCTTYPE
elif item == "and":
tokentype = token.AND
elif item == "or":
tokentype = token.OR
elif item == "not":
tokentype = token.NOT
elif item == "while":
tokentype = token.WHILE
elif item == "do":
tokentype = token.DO
elif item == "if":
tokentype = token.IF
elif item == "then":
tokentype = token.THEN
elif item == "else":
tokentype = token.ELSE
elif item == "elif":
tokentype = token.ELIF
elif item == "end":
tokentype = token.END
elif item == "fun":
tokentype = token.FUN
elif item == "var":
tokentype = token.VAR
elif item == "set":
tokentype = token.SET
elif item == "return":
tokentype = token.RETURN
elif item == "new":
tokentype = token.NEW
elif item == "nil":
tokentype = token.NIL
final_token = token.Token(tokentype, item, self.line, self.column)
self.column = self.column_index # sets column to new value
if tokentype == token.EOS: # sets column to 0 at the end of the line
self.column = 0
return final_token