@@ -305,11 +305,22 @@ def compile_tokens(token_list):
305305
306306
307307def is_symbol_name (text ):
308+ """
309+ Returns ``True`` if ``text`` is a valid identifier. Otherwise returns
310+ ``False``.
311+ """
312+ # Can't we just call match here?
308313 return full_symbol_pattern .sub ("" , text ) == ""
309314
310315
311316class Token (object ):
317+ "A representation of a Wolfram Language token"
312318 def __init__ (self , tag , text , pos ):
319+ """
320+ @param: tag A string that indicates which type of token this is.
321+ @param: text The actual contents of the token.
322+ @param: pos The position of the token in the input feed.
323+ """
313324 self .tag = tag
314325 self .text = text
315326 self .pos = pos
@@ -326,28 +337,53 @@ def __repr__(self):
326337
327338
328339class Tokeniser (object ):
340+ """
341+ A tokeniser for the Wolfram Language.
342+
343+ When subclassing ``Tokeniser``, custom tokenisation rules can be defined by
344+ declaring methods whose names are preceded by ``t_``, such as in the
345+ following example: ::
346+
347+ class MyTokeniser(Tokeniser):
348+ def t_MyWeirdRule(self, match):
349+ # Your logic goes here...
350+ pass
351+
352+ In this example, ``t_MyWeirdRule`` is supposed to update the internal state
353+ of the tokeniser and return a ``Token`` with an appropriate tag. ``m̀atch``
354+ is expected to be an instance of ``re.Match``.
355+ """
329356 modes = {
330357 "expr" : (tokens , token_indices ),
331358 "filename" : (filename_tokens , {}),
332359 }
333360
334361 def __init__ (self , feeder ):
362+ """
363+ @param: feeder An instance of ``LineFeeder`` which will feed characters
364+ to the tokeniser.
365+ """
335366 self .pos = 0
336367 self .feeder = feeder
337368 self .prescanner = Prescanner (feeder )
338369 self .code = self .prescanner .scan ()
339- self .change_mode ("expr" )
370+ self ._change_mode ("expr" )
340371
341- def change_mode (self , mode ):
372+ def _change_mode (self , mode ):
373+ """
374+ Set the mode of the tokeniser
375+ """
342376 self .mode = mode
343377 self .tokens , self .token_indices = self .modes [mode ]
344378
379+ # TODO: Rename this to something that remotetly makes sense?
345380 def incomplete (self ):
346- "get more code from the prescanner and continue"
381+ "Get more code from the prescanner and continue"
347382 self .prescanner .incomplete ()
348383 self .code += self .prescanner .scan ()
349384
350385 def sntx_message (self , pos = None ):
386+ """Send a message to the feeder."""
351387 if pos is None :
352388 pos = self .pos
353389 pre , post = self .code [:pos ], self .code [pos :].rstrip ("\n " )
@@ -356,9 +392,10 @@ def sntx_message(self, pos=None):
356392 else :
357393 self .feeder .message ("Syntax" , "sntxf" , pre , post )
358394
395+ # TODO: Convert this to __next__ in the future?
359396 def next (self ):
360- "return next token"
361- self .skip_blank ()
397+ "Returns the next token"
398+ self ._skip_blank ()
362399 if self .pos >= len (self .code ):
363400 return Token ("END" , "" , len (self .code ))
364401
@@ -390,8 +427,8 @@ def next(self):
390427 self .pos = match .end (0 )
391428 return Token (tag , text , match .start (0 ))
392429
393- def skip_blank (self ):
394- "skip whitespace and comments"
430+ def _skip_blank (self ):
431+ "Skip whitespace and comments"
395432 comment = [] # start positions of comments
396433 while True :
397434 if self .pos >= len (self .code ):
@@ -417,6 +454,7 @@ def skip_blank(self):
417454 break
418455
419456 def t_String (self , match ):
457+ "String rule"
420458 start , end = self .pos , None
421459 self .pos += 1 # skip opening '"'
422460 newlines = []
@@ -444,6 +482,7 @@ def t_String(self, match):
444482 return Token ("String" , result , start )
445483
446484 def t_Number (self , match ):
485+ "Number rule"
447486 text = match .group (0 )
448487 pos = match .end (0 )
449488 if self .code [pos - 1 : pos + 1 ] == ".." :
@@ -454,21 +493,27 @@ def t_Number(self, match):
454493 self .pos = pos
455494 return Token ("Number" , text , match .start (0 ))
456495
457- def token_mode (self , match , tag , mode ):
496+ # This isn't outside of here so it's considered internal
497+ def _token_mode (self , match , tag , mode ):
458498 "consume a token and switch mode"
459499 text = match .group (0 )
460500 self .pos = match .end (0 )
461- self .change_mode (mode )
501+ self ._change_mode (mode )
462502 return Token (tag , text , match .start (0 ))
463503
464504 def t_Get (self , match ):
465- return self .token_mode (match , "Get" , "filename" )
505+ "Get rule"
506+ return self ._token_mode (match , "Get" , "filename" )
466507
467508 def t_Put (self , match ):
468- return self .token_mode (match , "Put" , "filename" )
509+ "Put rule"
510+ return self ._token_mode (match , "Put" , "filename" )
469511
470512 def t_PutAppend (self , match ):
471- return self .token_mode (match , "PutAppend" , "filename" )
513+ "PutAppend rule"
514+ return self ._token_mode (match , "PutAppend" , "filename" )
472515
473516 def t_Filename (self , match ):
474- return self .token_mode (match , "Filename" , "expr" )
517+ "Filename rule"
518+ return self ._token_mode (match , "Filename" , "expr" )
519+
0 commit comments