2626 MessageField ,
2727 Option ,
2828 Proto ,
29+ Reference ,
2930 Scope ,
3031 Type ,
3132)
@@ -189,12 +190,9 @@ def util_parse_sequence(self, p: P) -> None:
189190 p [0 ] = []
190191
191192 def copy_p_tracking (self , p : P , from_ : int = 1 , to : int = 0 ) -> None :
192- """Don't know why P's tracking info (lexpos and lineno) sometimes missing.
193- Particular in recursion grammar situation. We have to copy it manually.
194-
195- Add this function in a p_xxx function when:
196- 1. the p[0] is gona to be used in another parsing target.
197- 2. and the tracking information is gona to be used there.
193+ """
194+ Ply's position tracking works only for lexing SYMBOLS (not for all grammer symbols) by default.
195+ We either enable parse(tracking=True), or copy them on need manually.
198196 """
199197 p .set_lexpos (to , p .lexpos (from_ ))
200198 p .set_lineno (to , p .lineno (from_ ))
@@ -213,13 +211,20 @@ def p_open_global_scope(self, p: P) -> None:
213211 filepath = self .current_filepath (),
214212 _bound = None ,
215213 scope_stack = self .current_scope_stack (),
214+ scope_start_lineno = 1 ,
215+ scope_start_col = 1 ,
216216 )
217217 self .push_scope (proto )
218218
219219 @override_docstring (r_close_global_scope )
220220 def p_close_global_scope (self , p : P ) -> None :
221221 scope = self .pop_scope ()
222222 proto = cast_or_raise (Proto , scope )
223+
224+ proto .scope_end_lineno = p .lexer .lexdata .count ("\n " ) # FIXME: slow?
225+ lexpos = len (p .lexer .lexdata )
226+ proto .scope_end_col = lexpos - p .lexer .lexdata .rfind ("\n " , 0 , lexpos )
227+
223228 if not proto .name :
224229 raise ProtoNameUndefined (filepath = self .current_filepath ())
225230 proto .freeze ()
@@ -334,6 +339,7 @@ def p_option(self, p: P) -> None:
334339 filepath = self .current_filepath (),
335340 lineno = p .lineno (2 ),
336341 token = p [2 ],
342+ token_col_start = self ._get_col (p , 2 ),
337343 )
338344 self .current_scope ().push_member (option )
339345
@@ -357,6 +363,7 @@ def p_alias(self, p: P) -> None:
357363 filepath = self .current_filepath (),
358364 lineno = lineno ,
359365 token = token ,
366+ token_col_start = self ._get_col (p , 2 ) if len (p ) == 6 else self ._get_col (p , 3 ),
360367 indent = self .current_indent (p ),
361368 scope_stack = self .current_scope_stack (),
362369 comment_block = self .collect_comment_block (),
@@ -384,6 +391,7 @@ def p_const(self, p: P) -> None:
384391 _bound = self .current_proto (),
385392 filepath = self .current_filepath (),
386393 token = p [2 ],
394+ token_col_start = self ._get_col (p , 2 ),
387395 lineno = p .lineno (2 ),
388396 )
389397 self .current_scope ().push_member (constant )
@@ -466,6 +474,15 @@ def p_constant_reference(self, p: P) -> None:
466474 p [0 ] = d
467475 self .copy_p_tracking (p )
468476
477+ reference = Reference (
478+ token = p [1 ],
479+ lineno = p .lineno (1 ),
480+ token_col_start = self ._get_col (p , 1 ),
481+ filepath = self .current_filepath (),
482+ referenced_definition = d ,
483+ )
484+ self .current_proto ().references .append (reference )
485+
469486 @override_docstring (r_type )
470487 def p_type (self , p : P ) -> None :
471488 p [0 ] = p [1 ]
@@ -498,9 +515,19 @@ def p_type_reference(self, p: P) -> None:
498515 token = p [1 ],
499516 lineno = p .lineno (1 ),
500517 )
518+
501519 p [0 ] = d
502520 self .copy_p_tracking (p )
503521
522+ reference = Reference (
523+ token = p [1 ],
524+ lineno = p .lineno (1 ),
525+ token_col_start = self ._get_col (p , 1 ),
526+ filepath = self .current_filepath (),
527+ referenced_definition = d ,
528+ )
529+ self .current_proto ().references .append (reference )
530+
504531 @override_docstring (r_optional_extensible_flag )
505532 def p_optional_extensible_flag (self , p : P ) -> None :
506533 extensible = len (p ) == 2
@@ -517,6 +544,7 @@ def p_array_type(self, p: P) -> None:
517544 cap = p [3 ],
518545 extensible = p [5 ],
519546 token = "{0}[{1}]" .format (p [1 ], p [3 ]),
547+ token_col_start = self ._get_col (p , 1 ),
520548 lineno = p .lineno (2 ),
521549 filepath = self .current_filepath (),
522550 )
@@ -552,12 +580,15 @@ def p_open_enum_scope(self, p: P) -> None:
552580 name = p [2 ],
553581 type = p [4 ],
554582 token = p [2 ],
583+ token_col_start = self ._get_col (p , 2 ),
555584 lineno = p .lineno (2 ),
556585 filepath = self .current_filepath (),
557586 indent = self .current_indent (p ),
558587 comment_block = self .collect_comment_block (),
559588 scope_stack = self .current_scope_stack (),
560589 _bound = self .current_proto (),
590+ scope_start_lineno = p .lineno (5 ), # '{'
591+ scope_start_col = self ._get_col (p , 5 ), # '{'
561592 )
562593 self .push_scope (enum )
563594
@@ -567,7 +598,10 @@ def p_enum_scope(self, p: P) -> None:
567598
568599 @override_docstring (r_close_enum_scope )
569600 def p_close_enum_scope (self , p : P ) -> None :
570- self .pop_scope ().freeze ()
601+ enum = self .pop_scope ()
602+ enum .scope_end_lineno = p .lineno (1 )
603+ enum .scope_end_col = self ._get_col (p , 1 )
604+ enum .freeze ()
571605
572606 @override_docstring (r_enum_items )
573607 def p_enum_items (self , p : P ) -> None :
@@ -605,6 +639,7 @@ def p_enum_field(self, p: P) -> None:
605639 name = name ,
606640 value = value ,
607641 token = p [1 ],
642+ token_col_start = self ._get_col (p , 1 ),
608643 lineno = p .lineno (1 ),
609644 indent = self .current_indent (p ),
610645 filepath = self .current_filepath (),
@@ -626,18 +661,24 @@ def p_open_message_scope(self, p: P) -> None:
626661 name = p [2 ],
627662 extensible = p [3 ],
628663 token = p [2 ],
664+ token_col_start = self ._get_col (p , 2 ),
629665 lineno = p .lineno (2 ),
630666 filepath = self .current_filepath (),
631667 indent = self .current_indent (p ),
632668 comment_block = self .collect_comment_block (),
633669 scope_stack = self .current_scope_stack (),
634670 _bound = self .current_proto (),
671+ scope_start_lineno = p .lineno (4 ), # '{'
672+ scope_start_col = self ._get_col (p , 4 ), # '{'
635673 )
636674 self .push_scope (message )
637675
638676 @override_docstring (r_close_message_scope )
639677 def p_close_message_scope (self , p : P ) -> None :
640- self .pop_scope ().freeze ()
678+ message = self .pop_scope ()
679+ message .scope_end_lineno = p .lineno (1 ) # '}'
680+ message .scope_end_col = self ._get_col (p , 1 ) # '}'
681+ message .freeze ()
641682
642683 @override_docstring (r_message_scope )
643684 def p_message_scope (self , p : P ) -> None :
@@ -673,6 +714,7 @@ def p_message_field(self, p: P) -> None:
673714 type = type ,
674715 number = field_number ,
675716 token = p [2 ],
717+ token_col_start = self ._get_col (p , 2 ),
676718 lineno = p .lineno (2 ),
677719 filepath = self .current_filepath (),
678720 comment_block = self .collect_comment_block (),
@@ -685,6 +727,7 @@ def p_message_field(self, p: P) -> None:
685727 @override_docstring (r_message_field_name )
686728 def p_message_field_name (self , p : P ) -> None :
687729 p [0 ] = p [1 ]
730+ self .copy_p_tracking (p ) # from 1 to 0
688731
689732 @override_docstring (r_boolean_literal )
690733 def p_boolean_literal (self , p : P ) -> None :
@@ -700,7 +743,7 @@ def p_string_literal(self, p: P) -> None:
700743
701744 @override_docstring (r_dotted_identifier )
702745 def p_dotted_identifier (self , p : P ) -> None :
703- self .copy_p_tracking (p )
746+ self .copy_p_tracking (p ) # from 1 => 0
704747 if len (p ) == 4 :
705748 p [0 ] = "." .join ([p [1 ], p [3 ]])
706749 elif len (p ) == 2 :
@@ -716,6 +759,13 @@ def p_error(self, p: P) -> None:
716759 raise GrammarError (filepath = filepath , token = p .value (1 ), lineno = p .lineno (1 ))
717760 raise GrammarError ()
718761
762+ def _get_col (self , p : P , k : int ) -> int :
763+ lexpos = p .lexpos (k )
764+ # we dont use `last_newline_pos` here,
765+ # because the recursive parsing may result a deeper `last_newline_pos`.
766+ last_newline = p .lexer .lexdata .rfind ("\n " , 0 , lexpos )
767+ return lexpos - max (last_newline , 0 )
768+
719769
720770def parse (filepath : str , traditional_mode : bool = False ) -> Proto :
721771 """Parse a bitproto from given filepath.
@@ -726,3 +776,14 @@ def parse(filepath: str, traditional_mode: bool = False) -> Proto:
726776 extensible grammar is used in traditional mode.
727777 """
728778 return Parser (traditional_mode = traditional_mode ).parse (filepath )
779+
780+
781+ def parse_string (
782+ content : str , traditional_mode : bool = False , filepath : str = ""
783+ ) -> Proto :
784+ """
785+ Parse a bitproto from string.
786+ """
787+ return Parser (traditional_mode = traditional_mode ).parse_string (
788+ content , filepath = filepath
789+ )
0 commit comments