@@ -9,8 +9,8 @@ use red_knot_python_semantic::Db;
99use ruff_db:: files:: FileRange ;
1010use ruff_db:: source:: { line_index, source_text} ;
1111use ruff_notebook:: NotebookIndex ;
12- use ruff_source_file:: OneIndexed ;
13- use ruff_source_file:: { LineIndex , SourceLocation } ;
12+ use ruff_source_file:: LineIndex ;
13+ use ruff_source_file:: { OneIndexed , SourceLocation } ;
1414use ruff_text_size:: { Ranged , TextRange , TextSize } ;
1515
1616#[ expect( dead_code) ]
@@ -46,7 +46,7 @@ impl TextSizeExt for TextSize {
4646 index : & LineIndex ,
4747 encoding : PositionEncoding ,
4848 ) -> types:: Position {
49- let source_location = offset_to_source_location ( self , text, index , encoding) ;
49+ let source_location = index . source_location ( self , text, encoding. into ( ) ) ;
5050 source_location_to_position ( & source_location)
5151 }
5252}
@@ -75,36 +75,14 @@ fn u32_index_to_usize(index: u32) -> usize {
7575
7676impl PositionExt for lsp_types:: Position {
7777 fn to_text_size ( & self , text : & str , index : & LineIndex , encoding : PositionEncoding ) -> TextSize {
78- let start_line = index. line_range (
79- OneIndexed :: from_zero_indexed ( u32_index_to_usize ( self . line ) ) ,
78+ index. offset (
79+ SourceLocation {
80+ line : OneIndexed :: from_zero_indexed ( u32_index_to_usize ( self . line ) ) ,
81+ character_offset : OneIndexed :: from_zero_indexed ( u32_index_to_usize ( self . character ) ) ,
82+ } ,
8083 text,
81- ) ;
82-
83- let start_column_offset = match encoding {
84- PositionEncoding :: UTF8 => TextSize :: new ( self . character ) ,
85-
86- PositionEncoding :: UTF16 => {
87- // Fast path for ASCII only documents
88- if index. is_ascii ( ) {
89- TextSize :: new ( self . character )
90- } else {
91- // UTF16 encodes characters either as one or two 16 bit words.
92- // The position in `range` is the 16-bit word offset from the start of the line (and not the character offset)
93- // UTF-16 with a text that may use variable-length characters.
94- utf8_column_offset ( self . character , & text[ start_line] )
95- }
96- }
97- PositionEncoding :: UTF32 => {
98- // UTF-32 uses 4 bytes for each character. Meaning, the position in range is a character offset.
99- return index. offset (
100- OneIndexed :: from_zero_indexed ( u32_index_to_usize ( self . line ) ) ,
101- OneIndexed :: from_zero_indexed ( u32_index_to_usize ( self . character ) ) ,
102- text,
103- ) ;
104- }
105- } ;
106-
107- start_line. start ( ) + start_column_offset. clamp ( TextSize :: new ( 0 ) , start_line. end ( ) )
84+ encoding. into ( ) ,
85+ )
10886 }
10987}
11088
@@ -142,26 +120,23 @@ impl ToRangeExt for TextRange {
142120 notebook_index : & NotebookIndex ,
143121 encoding : PositionEncoding ,
144122 ) -> NotebookRange {
145- let start = offset_to_source_location ( self . start ( ) , text, source_index , encoding) ;
146- let mut end = offset_to_source_location ( self . end ( ) , text, source_index , encoding) ;
147- let starting_cell = notebook_index. cell ( start. row ) ;
123+ let start = source_index . source_location ( self . start ( ) , text, encoding. into ( ) ) ;
124+ let mut end = source_index . source_location ( self . end ( ) , text, encoding. into ( ) ) ;
125+ let starting_cell = notebook_index. cell ( start. line ) ;
148126
149127 // weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds')
150128 // we need to move it one character back (which should place it at the end of the last line).
151129 // we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset.
152- if notebook_index. cell ( end. row ) != starting_cell {
153- end. row = end. row . saturating_sub ( 1 ) ;
154- end. column = offset_to_source_location (
155- self . end ( ) . checked_sub ( 1 . into ( ) ) . unwrap_or_default ( ) ,
156- text,
157- source_index,
158- encoding,
159- )
160- . column ;
130+ if notebook_index. cell ( end. line ) != starting_cell {
131+ end. line = end. line . saturating_sub ( 1 ) ;
132+ let offset = self . end ( ) . checked_sub ( 1 . into ( ) ) . unwrap_or_default ( ) ;
133+ end. character_offset = source_index
134+ . source_location ( offset, text, encoding. into ( ) )
135+ . character_offset ;
161136 }
162137
163- let start = source_location_to_position ( & notebook_index. translate_location ( & start) ) ;
164- let end = source_location_to_position ( & notebook_index. translate_location ( & end) ) ;
138+ let start = source_location_to_position ( & notebook_index. translate_source_location ( & start) ) ;
139+ let end = source_location_to_position ( & notebook_index. translate_source_location ( & end) ) ;
165140
166141 NotebookRange {
167142 cell : starting_cell
@@ -172,67 +147,10 @@ impl ToRangeExt for TextRange {
172147 }
173148}
174149
175- /// Converts a UTF-16 code unit offset for a given line into a UTF-8 column number.
176- fn utf8_column_offset ( utf16_code_unit_offset : u32 , line : & str ) -> TextSize {
177- let mut utf8_code_unit_offset = TextSize :: new ( 0 ) ;
178-
179- let mut i = 0u32 ;
180-
181- for c in line. chars ( ) {
182- if i >= utf16_code_unit_offset {
183- break ;
184- }
185-
186- // Count characters encoded as two 16 bit words as 2 characters.
187- {
188- utf8_code_unit_offset +=
189- TextSize :: new ( u32:: try_from ( c. len_utf8 ( ) ) . expect ( "utf8 len always <=4" ) ) ;
190- i += u32:: try_from ( c. len_utf16 ( ) ) . expect ( "utf16 len always <=2" ) ;
191- }
192- }
193-
194- utf8_code_unit_offset
195- }
196-
197- fn offset_to_source_location (
198- offset : TextSize ,
199- text : & str ,
200- index : & LineIndex ,
201- encoding : PositionEncoding ,
202- ) -> SourceLocation {
203- match encoding {
204- PositionEncoding :: UTF8 => {
205- let row = index. line_index ( offset) ;
206- let column = offset - index. line_start ( row, text) ;
207-
208- SourceLocation {
209- column : OneIndexed :: from_zero_indexed ( column. to_usize ( ) ) ,
210- row,
211- }
212- }
213- PositionEncoding :: UTF16 => {
214- let row = index. line_index ( offset) ;
215-
216- let column = if index. is_ascii ( ) {
217- ( offset - index. line_start ( row, text) ) . to_usize ( )
218- } else {
219- let up_to_line = & text[ TextRange :: new ( index. line_start ( row, text) , offset) ] ;
220- up_to_line. encode_utf16 ( ) . count ( )
221- } ;
222-
223- SourceLocation {
224- column : OneIndexed :: from_zero_indexed ( column) ,
225- row,
226- }
227- }
228- PositionEncoding :: UTF32 => index. source_location ( offset, text) ,
229- }
230- }
231-
232150fn source_location_to_position ( location : & SourceLocation ) -> types:: Position {
233151 types:: Position {
234- line : u32:: try_from ( location. row . to_zero_indexed ( ) ) . expect ( "row usize fits in u32" ) ,
235- character : u32:: try_from ( location. column . to_zero_indexed ( ) )
152+ line : u32:: try_from ( location. line . to_zero_indexed ( ) ) . expect ( "line usize fits in u32" ) ,
153+ character : u32:: try_from ( location. character_offset . to_zero_indexed ( ) )
236154 . expect ( "character usize fits in u32" ) ,
237155 }
238156}
0 commit comments