@@ -305,11 +305,22 @@ def compile_tokens(token_list):
305305
306306
307307def is_symbol_name (text ):
308+ """
309+ Returns ``True`` if ``text`` is a valid identifier. Otherwise returns
310+ ``False``.
311+ """
312+ # Can't we just call match here?
308313 return full_symbol_pattern .sub ("" , text ) == ""
309314
310315
311316class Token (object ):
317+ "A representation of a Wolfram Language token"
312318 def __init__ (self , tag , text , pos ):
319+ """
320+ @param: tag A string that indicates which type of token this is.
321+ @param: text The actual contents of the token.
322+ @param: pos The position of the token in the input feed.
323+ """
313324 self .tag = tag
314325 self .text = text
315326 self .pos = pos
@@ -326,28 +337,54 @@ def __repr__(self):
326337
327338
328339class Tokeniser (object ):
340+ """
341+ A tokenizer for the Wolfram Language.
342+
343+ When subclassing ``Tokeniser``, custom tokenisation rules can be defined by
344+ declaring methods whose names are preceded by ``t_``, such as in the
345+ following example: ::
346+
347+ class MyTokeniser(Tokeniser):
348+ def t_MyWeirdRule(self, match):
349+ # Your logic goes here...
350+ pass
351+
352+ In this example, ``t_MyWeirdRule`` is supposed to update the internal state
353+ of the tokeniser and return a ``Token`` with an appropriate tag. ``m̀atch``
354+ is expected to be an instance of ``re.Match``.
355+ """
329356 modes = {
330357 "expr" : (tokens , token_indices ),
331358 "filename" : (filename_tokens , {}),
332359 }
333360
334361 def __init__ (self , feeder ):
362+ """
363+ @param: feeder An instance of ``LineFeeder`` which will feed characters
364+ to the tokenizer.
365+ """
335366 self .pos = 0
336367 self .feeder = feeder
337368 self .prescanner = Prescanner (feeder )
338369 self .code = self .prescanner .scan ()
339370 self .change_mode ("expr" )
340371
372+ # TODO: Turn this into a setter in the future?
341373 def change_mode (self , mode ):
374+ """
375+ Set the mode of the tokenizer
376+ """
342377 self .mode = mode
343378 self .tokens , self .token_indices = self .modes [mode ]
344379
380+ # TODO: Rename this to something that remotetly makes sense?
345381 def incomplete (self ):
346- "get more code from the prescanner and continue"
382+ "Get more code from the prescanner and continue"
347383 self .prescanner .incomplete ()
348384 self .code += self .prescanner .scan ()
349385
350386 def sntx_message (self , pos = None ):
387+ """Send a message to the feeder."""
351388 if pos is None :
352389 pos = self .pos
353390 pre , post = self .code [:pos ], self .code [pos :].rstrip ("\n " )
@@ -356,8 +393,9 @@ def sntx_message(self, pos=None):
356393 else :
357394 self .feeder .message ("Syntax" , "sntxf" , pre , post )
358395
396+ # TODO: Convert this to __next__ in the future?
359397 def next (self ):
360- "return next token"
398+ "Returns the next token"
361399 self .skip_blank ()
362400 if self .pos >= len (self .code ):
363401 return Token ("END" , "" , len (self .code ))
@@ -391,7 +429,7 @@ def next(self):
391429 return Token (tag , text , match .start (0 ))
392430
393431 def skip_blank (self ):
394- "skip whitespace and comments"
432+ "Skip whitespace and comments"
395433 comment = [] # start positions of comments
396434 while True :
397435 if self .pos >= len (self .code ):
@@ -417,6 +455,7 @@ def skip_blank(self):
417455 break
418456
419457 def t_String (self , match ):
458+ "``String`` tokenizer"
420459 start , end = self .pos , None
421460 self .pos += 1 # skip opening '"'
422461 newlines = []
@@ -444,6 +483,7 @@ def t_String(self, match):
444483 return Token ("String" , result , start )
445484
446485 def t_Number (self , match ):
486+ "Number tag"
447487 text = match .group (0 )
448488 pos = match .end (0 )
449489 if self .code [pos - 1 : pos + 1 ] == ".." :
@@ -454,21 +494,27 @@ def t_Number(self, match):
454494 self .pos = pos
455495 return Token ("Number" , text , match .start (0 ))
456496
457- def token_mode (self , match , tag , mode ):
497+ # This isn't outside of here so it's considered internal
498+ def _token_mode (self , match , tag , mode ):
458499 "consume a token and switch mode"
459500 text = match .group (0 )
460501 self .pos = match .end (0 )
461502 self .change_mode (mode )
462503 return Token (tag , text , match .start (0 ))
463504
464505 def t_Get (self , match ):
465- return self .token_mode (match , "Get" , "filename" )
506+ "Get tag"
507+ return self ._token_mode (match , "Get" , "filename" )
466508
467509 def t_Put (self , match ):
468- return self .token_mode (match , "Put" , "filename" )
510+ "Put tag"
511+ return self ._token_mode (match , "Put" , "filename" )
469512
470513 def t_PutAppend (self , match ):
471- return self .token_mode (match , "PutAppend" , "filename" )
514+ "PutAppend tag"
515+ return self ._token_mode (match , "PutAppend" , "filename" )
472516
473517 def t_Filename (self , match ):
474- return self .token_mode (match , "Filename" , "expr" )
518+ "Filename tag"
519+ return self ._token_mode (match , "Filename" , "expr" )
520+
0 commit comments