1- use std :: collections :: BTreeSet ;
1+ use memchr :: memchr_iter ;
22
33use crate :: comment_block:: CommentBlock ;
44use crate :: parser_state:: line_difference_requires_newline;
55use crate :: types:: { LineNumber , SourceOffset } ;
6- use crate :: util:: { u8_to_str , u8_to_string} ;
6+ use crate :: util:: u8_to_string;
77
88/// A vector of offsets in the source code where lines start, which
99/// we use to detect what line a given offset is one.
@@ -18,18 +18,7 @@ pub struct LineIndex {
1818}
1919
2020impl LineIndex {
21- pub fn new ( file_contents : & [ u8 ] ) -> Self {
22- let mut line_starts = Vec :: new ( ) ;
23-
24- // First line always starts at position 0
25- line_starts. push ( 0 ) ;
26-
27- for ( i, & byte) in file_contents. iter ( ) . enumerate ( ) {
28- if byte == b'\n' {
29- line_starts. push ( i + 1 ) ;
30- }
31- }
32-
21+ fn from_vec ( line_starts : Vec < usize > ) -> Self {
3322 LineIndex { line_starts }
3423 }
3524
@@ -63,7 +52,8 @@ pub struct FileComments {
6352 start_of_file_contiguous_comment_lines : Option < CommentBlock > ,
6453 /// A list of comments, sorted in order by `LineNumber`
6554 other_comments : Vec < ( LineNumber , String ) > ,
66- lines_with_ruby : BTreeSet < LineNumber > ,
55+ /// Sorted list of line numbers that contain Ruby code (not comments/blank)
56+ lines_with_ruby : Vec < LineNumber > ,
6757 last_lineno : LineNumber ,
6858 line_index : LineIndex ,
6959 /// Sorted list of byte offsets where comments start
@@ -72,7 +62,36 @@ pub struct FileComments {
7262
7363impl FileComments {
7464 pub fn from_prism_comments ( comments : ruby_prism:: Comments , source : & [ u8 ] ) -> FileComments {
75- let line_index = LineIndex :: new ( source) ;
65+ let mut line_starts = Vec :: new ( ) ;
66+ let mut lines_with_ruby = Vec :: new ( ) ;
67+
68+ line_starts. push ( 0 ) ; // First line always starts at position 0
69+
70+ let mut line_start = 0 ;
71+ let mut lineno = 1 ;
72+ let mut inside_embdoc = false ;
73+
74+ for i in memchr_iter ( b'\n' , source) {
75+ line_starts. push ( i + 1 ) ;
76+
77+ if Self :: line_has_ruby ( & source[ line_start..i] , & mut inside_embdoc) {
78+ lines_with_ruby. push ( lineno) ;
79+ }
80+
81+ line_start = i + 1 ;
82+ lineno += 1 ;
83+ }
84+
85+ // Handle last line if no trailing newline
86+ if line_start < source. len ( ) {
87+ let line = & source[ line_start..] ;
88+ if Self :: line_has_ruby ( line, & mut inside_embdoc) {
89+ lines_with_ruby. push ( lineno) ;
90+ }
91+ }
92+
93+ let line_index = LineIndex :: from_vec ( line_starts) ;
94+
7695 let mut file_comments = FileComments :: default ( ) ;
7796 for comment in comments {
7897 file_comments. push_comment (
@@ -84,39 +103,36 @@ impl FileComments {
84103 . push ( comment. location ( ) . start_offset ( ) ) ;
85104 }
86105
87- // Lookup lines that have any Ruby
88- let mut inside_embdoc = false ;
89- u8_to_str ( source)
90- . lines ( )
91- . enumerate ( )
92- . filter ( |( _lineno, line_contents) | {
93- let contents = line_contents. trim ( ) ;
94- if contents. starts_with ( "=begin" ) {
95- inside_embdoc = true ;
96- return false ;
97- }
98- if contents. starts_with ( "=end" ) {
99- inside_embdoc = false ;
100- return false ;
101- }
102- if inside_embdoc {
103- return false ;
104- }
105- !( contents. starts_with ( "#" ) || contents. is_empty ( ) )
106- } )
107- . for_each ( |( lineno, _) | {
108- file_comments
109- . lines_with_ruby
110- // Insert as one-offset to work with Ripper.
111- // This (and elsewhere) can be zero-offset once Ripper is removed
112- . insert ( ( lineno + 1 ) as u64 ) ;
113- } ) ;
114-
106+ file_comments. lines_with_ruby = lines_with_ruby;
115107 file_comments. last_lineno = line_index. line_starts . len ( ) as u64 ;
116108 file_comments. line_index = line_index;
117109 file_comments
118110 }
119111
112+ fn line_has_ruby ( line : & [ u8 ] , inside_embdoc : & mut bool ) -> bool {
113+ let first_non_ws = line. iter ( ) . position ( |b| !u8:: is_ascii_whitespace ( b) ) ;
114+ let Some ( idx) = first_non_ws else {
115+ return false ;
116+ } ;
117+
118+ let trimmed = & line[ idx..] ;
119+
120+ if trimmed. starts_with ( b"=begin" ) {
121+ * inside_embdoc = true ;
122+ return false ;
123+ }
124+ if trimmed. starts_with ( b"=end" ) {
125+ * inside_embdoc = false ;
126+ return false ;
127+ }
128+ if * inside_embdoc {
129+ return false ;
130+ }
131+
132+ // Check if it's a comment
133+ trimmed[ 0 ] != b'#'
134+ }
135+
120136 pub fn still_in_file ( & self , line_number : LineNumber ) -> bool {
121137 line_number < self . last_lineno
122138 }
@@ -162,7 +178,7 @@ impl FileComments {
162178 }
163179
164180 pub fn is_empty_line ( & self , line_number : LineNumber ) -> bool {
165- ! self . lines_with_ruby . contains ( & line_number)
181+ self . lines_with_ruby . binary_search ( & line_number) . is_err ( )
166182 }
167183
168184 pub fn take_start_of_file_contiguous_comment_lines ( & mut self ) -> Option < CommentBlock > {
0 commit comments