Skip to content

Commit 6ef9a0b

Browse files
committed
Documented stuff used by Mathics
1 parent 295fe97 commit 6ef9a0b

File tree

2 files changed

+60
-10
lines changed

2 files changed

+60
-10
lines changed

mathics_scanner/errors.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,21 @@
33

44

55
class TranslateError(Exception):
6-
def __init__(self):
7-
pass
6+
"""A generic class of tokenizing errors"""
7+
pass
88

99

1010
class ScanError(TranslateError):
11+
"""A generic scanning error"""
1112
pass
1213

1314

1415
class InvalidSyntaxError(TranslateError):
16+
"""Invalid syntax"""
1517
pass
1618

1719

1820
class IncompleteSyntaxError(TranslateError):
21+
"""More characters were expected to form a valid token"""
1922
pass
23+

mathics_scanner/tokeniser.py

Lines changed: 54 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -305,11 +305,22 @@ def compile_tokens(token_list):
305305

306306

307307
def is_symbol_name(text):
308+
"""
309+
Returns ``True`` if ``text`` is a valid identifier. Otherwise returns
310+
``False``.
311+
"""
312+
# Can't we just call match here?
308313
return full_symbol_pattern.sub("", text) == ""
309314

310315

311316
class Token(object):
317+
"A representation of a Wolfram Language token"
312318
def __init__(self, tag, text, pos):
319+
"""
320+
@param: tag A string that indicates which type of token this is.
321+
@param: text The actual contents of the token.
322+
@param: pos The position of the token in the input feed.
323+
"""
313324
self.tag = tag
314325
self.text = text
315326
self.pos = pos
@@ -326,28 +337,54 @@ def __repr__(self):
326337

327338

328339
class Tokeniser(object):
340+
"""
341+
A tokenizer for the Wolfram Language.
342+
343+
When subclassing ``Tokeniser``, custom tokenisation rules can be defined by
344+
declaring methods whose names are preceded by ``t_``, such as in the
345+
following example: ::
346+
347+
class MyTokeniser(Tokeniser):
348+
def t_MyWeirdRule(self, match):
349+
# Your logic goes here...
350+
pass
351+
352+
In this example, ``t_MyWeirdRule`` is supposed to update the internal state
353+
of the tokeniser and return a ``Token`` with an appropriate tag. ``m̀atch``
354+
is expected to be an instance of ``re.Match``.
355+
"""
329356
modes = {
330357
"expr": (tokens, token_indices),
331358
"filename": (filename_tokens, {}),
332359
}
333360

334361
def __init__(self, feeder):
362+
"""
363+
@param: feeder An instance of ``LineFeeder`` which will feed characters
364+
to the tokenizer.
365+
"""
335366
self.pos = 0
336367
self.feeder = feeder
337368
self.prescanner = Prescanner(feeder)
338369
self.code = self.prescanner.scan()
339370
self.change_mode("expr")
340371

372+
# TODO: Turn this into a setter in the future?
341373
def change_mode(self, mode):
374+
"""
375+
Set the mode of the tokenizer
376+
"""
342377
self.mode = mode
343378
self.tokens, self.token_indices = self.modes[mode]
344379

380+
# TODO: Rename this to something that remotetly makes sense?
345381
def incomplete(self):
346-
"get more code from the prescanner and continue"
382+
"Get more code from the prescanner and continue"
347383
self.prescanner.incomplete()
348384
self.code += self.prescanner.scan()
349385

350386
def sntx_message(self, pos=None):
387+
"""Send a message to the feeder."""
351388
if pos is None:
352389
pos = self.pos
353390
pre, post = self.code[:pos], self.code[pos:].rstrip("\n")
@@ -356,8 +393,9 @@ def sntx_message(self, pos=None):
356393
else:
357394
self.feeder.message("Syntax", "sntxf", pre, post)
358395

396+
# TODO: Convert this to __next__ in the future?
359397
def next(self):
360-
"return next token"
398+
"Returns the next token"
361399
self.skip_blank()
362400
if self.pos >= len(self.code):
363401
return Token("END", "", len(self.code))
@@ -391,7 +429,7 @@ def next(self):
391429
return Token(tag, text, match.start(0))
392430

393431
def skip_blank(self):
394-
"skip whitespace and comments"
432+
"Skip whitespace and comments"
395433
comment = [] # start positions of comments
396434
while True:
397435
if self.pos >= len(self.code):
@@ -417,6 +455,7 @@ def skip_blank(self):
417455
break
418456

419457
def t_String(self, match):
458+
"``String`` tokenizer"
420459
start, end = self.pos, None
421460
self.pos += 1 # skip opening '"'
422461
newlines = []
@@ -444,6 +483,7 @@ def t_String(self, match):
444483
return Token("String", result, start)
445484

446485
def t_Number(self, match):
486+
"Number tag"
447487
text = match.group(0)
448488
pos = match.end(0)
449489
if self.code[pos - 1 : pos + 1] == "..":
@@ -454,21 +494,27 @@ def t_Number(self, match):
454494
self.pos = pos
455495
return Token("Number", text, match.start(0))
456496

457-
def token_mode(self, match, tag, mode):
497+
# This isn't outside of here so it's considered internal
498+
def _token_mode(self, match, tag, mode):
458499
"consume a token and switch mode"
459500
text = match.group(0)
460501
self.pos = match.end(0)
461502
self.change_mode(mode)
462503
return Token(tag, text, match.start(0))
463504

464505
def t_Get(self, match):
465-
return self.token_mode(match, "Get", "filename")
506+
"Get tag"
507+
return self._token_mode(match, "Get", "filename")
466508

467509
def t_Put(self, match):
468-
return self.token_mode(match, "Put", "filename")
510+
"Put tag"
511+
return self._token_mode(match, "Put", "filename")
469512

470513
def t_PutAppend(self, match):
471-
return self.token_mode(match, "PutAppend", "filename")
514+
"PutAppend tag"
515+
return self._token_mode(match, "PutAppend", "filename")
472516

473517
def t_Filename(self, match):
474-
return self.token_mode(match, "Filename", "expr")
518+
"Filename tag"
519+
return self._token_mode(match, "Filename", "expr")
520+

0 commit comments

Comments
 (0)