Skip to content

Commit 1c65e0a

Browse files
authored
Split SourceLocation into LineColumn and SourceLocation (astral-sh#17587)
1 parent 4443f66 commit 1c65e0a

File tree

29 files changed

+696
-538
lines changed

29 files changed

+696
-538
lines changed

crates/red_knot_server/src/document.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@ pub enum PositionEncoding {
2727
UTF8,
2828
}
2929

30+
impl From<PositionEncoding> for ruff_source_file::PositionEncoding {
31+
fn from(value: PositionEncoding) -> Self {
32+
match value {
33+
PositionEncoding::UTF8 => Self::Utf8,
34+
PositionEncoding::UTF16 => Self::Utf16,
35+
PositionEncoding::UTF32 => Self::Utf32,
36+
}
37+
}
38+
}
39+
3040
/// A unique document ID, derived from a URL passed as part of an LSP request.
3141
/// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook.
3242
#[derive(Clone, Debug)]

crates/red_knot_server/src/document/range.rs

Lines changed: 23 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ use red_knot_python_semantic::Db;
99
use ruff_db::files::FileRange;
1010
use ruff_db::source::{line_index, source_text};
1111
use ruff_notebook::NotebookIndex;
12-
use ruff_source_file::OneIndexed;
13-
use ruff_source_file::{LineIndex, SourceLocation};
12+
use ruff_source_file::LineIndex;
13+
use ruff_source_file::{OneIndexed, SourceLocation};
1414
use ruff_text_size::{Ranged, TextRange, TextSize};
1515

1616
#[expect(dead_code)]
@@ -46,7 +46,7 @@ impl TextSizeExt for TextSize {
4646
index: &LineIndex,
4747
encoding: PositionEncoding,
4848
) -> types::Position {
49-
let source_location = offset_to_source_location(self, text, index, encoding);
49+
let source_location = index.source_location(self, text, encoding.into());
5050
source_location_to_position(&source_location)
5151
}
5252
}
@@ -75,36 +75,14 @@ fn u32_index_to_usize(index: u32) -> usize {
7575

7676
impl PositionExt for lsp_types::Position {
7777
fn to_text_size(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> TextSize {
78-
let start_line = index.line_range(
79-
OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
78+
index.offset(
79+
SourceLocation {
80+
line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
81+
character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(self.character)),
82+
},
8083
text,
81-
);
82-
83-
let start_column_offset = match encoding {
84-
PositionEncoding::UTF8 => TextSize::new(self.character),
85-
86-
PositionEncoding::UTF16 => {
87-
// Fast path for ASCII only documents
88-
if index.is_ascii() {
89-
TextSize::new(self.character)
90-
} else {
91-
// UTF16 encodes characters either as one or two 16 bit words.
92-
// The position in `range` is the 16-bit word offset from the start of the line (and not the character offset)
93-
// UTF-16 with a text that may use variable-length characters.
94-
utf8_column_offset(self.character, &text[start_line])
95-
}
96-
}
97-
PositionEncoding::UTF32 => {
98-
// UTF-32 uses 4 bytes for each character. Meaning, the position in range is a character offset.
99-
return index.offset(
100-
OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
101-
OneIndexed::from_zero_indexed(u32_index_to_usize(self.character)),
102-
text,
103-
);
104-
}
105-
};
106-
107-
start_line.start() + start_column_offset.clamp(TextSize::new(0), start_line.end())
84+
encoding.into(),
85+
)
10886
}
10987
}
11088

@@ -142,26 +120,23 @@ impl ToRangeExt for TextRange {
142120
notebook_index: &NotebookIndex,
143121
encoding: PositionEncoding,
144122
) -> NotebookRange {
145-
let start = offset_to_source_location(self.start(), text, source_index, encoding);
146-
let mut end = offset_to_source_location(self.end(), text, source_index, encoding);
147-
let starting_cell = notebook_index.cell(start.row);
123+
let start = source_index.source_location(self.start(), text, encoding.into());
124+
let mut end = source_index.source_location(self.end(), text, encoding.into());
125+
let starting_cell = notebook_index.cell(start.line);
148126

149127
// weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds')
150128
// we need to move it one character back (which should place it at the end of the last line).
151129
// we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset.
152-
if notebook_index.cell(end.row) != starting_cell {
153-
end.row = end.row.saturating_sub(1);
154-
end.column = offset_to_source_location(
155-
self.end().checked_sub(1.into()).unwrap_or_default(),
156-
text,
157-
source_index,
158-
encoding,
159-
)
160-
.column;
130+
if notebook_index.cell(end.line) != starting_cell {
131+
end.line = end.line.saturating_sub(1);
132+
let offset = self.end().checked_sub(1.into()).unwrap_or_default();
133+
end.character_offset = source_index
134+
.source_location(offset, text, encoding.into())
135+
.character_offset;
161136
}
162137

163-
let start = source_location_to_position(&notebook_index.translate_location(&start));
164-
let end = source_location_to_position(&notebook_index.translate_location(&end));
138+
let start = source_location_to_position(&notebook_index.translate_source_location(&start));
139+
let end = source_location_to_position(&notebook_index.translate_source_location(&end));
165140

166141
NotebookRange {
167142
cell: starting_cell
@@ -172,67 +147,10 @@ impl ToRangeExt for TextRange {
172147
}
173148
}
174149

175-
/// Converts a UTF-16 code unit offset for a given line into a UTF-8 column number.
176-
fn utf8_column_offset(utf16_code_unit_offset: u32, line: &str) -> TextSize {
177-
let mut utf8_code_unit_offset = TextSize::new(0);
178-
179-
let mut i = 0u32;
180-
181-
for c in line.chars() {
182-
if i >= utf16_code_unit_offset {
183-
break;
184-
}
185-
186-
// Count characters encoded as two 16 bit words as 2 characters.
187-
{
188-
utf8_code_unit_offset +=
189-
TextSize::new(u32::try_from(c.len_utf8()).expect("utf8 len always <=4"));
190-
i += u32::try_from(c.len_utf16()).expect("utf16 len always <=2");
191-
}
192-
}
193-
194-
utf8_code_unit_offset
195-
}
196-
197-
fn offset_to_source_location(
198-
offset: TextSize,
199-
text: &str,
200-
index: &LineIndex,
201-
encoding: PositionEncoding,
202-
) -> SourceLocation {
203-
match encoding {
204-
PositionEncoding::UTF8 => {
205-
let row = index.line_index(offset);
206-
let column = offset - index.line_start(row, text);
207-
208-
SourceLocation {
209-
column: OneIndexed::from_zero_indexed(column.to_usize()),
210-
row,
211-
}
212-
}
213-
PositionEncoding::UTF16 => {
214-
let row = index.line_index(offset);
215-
216-
let column = if index.is_ascii() {
217-
(offset - index.line_start(row, text)).to_usize()
218-
} else {
219-
let up_to_line = &text[TextRange::new(index.line_start(row, text), offset)];
220-
up_to_line.encode_utf16().count()
221-
};
222-
223-
SourceLocation {
224-
column: OneIndexed::from_zero_indexed(column),
225-
row,
226-
}
227-
}
228-
PositionEncoding::UTF32 => index.source_location(offset, text),
229-
}
230-
}
231-
232150
fn source_location_to_position(location: &SourceLocation) -> types::Position {
233151
types::Position {
234-
line: u32::try_from(location.row.to_zero_indexed()).expect("row usize fits in u32"),
235-
character: u32::try_from(location.column.to_zero_indexed())
152+
line: u32::try_from(location.line.to_zero_indexed()).expect("line usize fits in u32"),
153+
character: u32::try_from(location.character_offset.to_zero_indexed())
236154
.expect("character usize fits in u32"),
237155
}
238156
}

crates/red_knot_test/src/matcher.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ impl Matcher {
263263
.and_then(|span| span.range())
264264
.map(|range| {
265265
self.line_index
266-
.source_location(range.start(), &self.source)
266+
.line_column(range.start(), &self.source)
267267
.column
268268
})
269269
.unwrap_or(OneIndexed::from_zero_indexed(0))

crates/red_knot_wasm/src/lib.rs

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use ruff_db::system::{
1919
use ruff_db::Upcast;
2020
use ruff_notebook::Notebook;
2121
use ruff_python_formatter::formatted_file;
22-
use ruff_source_file::{LineIndex, OneIndexed, SourceLocation};
22+
use ruff_source_file::{LineColumn, LineIndex, OneIndexed, PositionEncoding, SourceLocation};
2323
use ruff_text_size::{Ranged, TextSize};
2424
use wasm_bindgen::prelude::*;
2525

@@ -408,8 +408,8 @@ impl Range {
408408
}
409409
}
410410

411-
impl From<(SourceLocation, SourceLocation)> for Range {
412-
fn from((start, end): (SourceLocation, SourceLocation)) -> Self {
411+
impl From<(LineColumn, LineColumn)> for Range {
412+
fn from((start, end): (LineColumn, LineColumn)) -> Self {
413413
Self {
414414
start: start.into(),
415415
end: end.into(),
@@ -438,29 +438,34 @@ impl Position {
438438
impl Position {
439439
fn to_text_size(self, text: &str, index: &LineIndex) -> Result<TextSize, Error> {
440440
let text_size = index.offset(
441-
OneIndexed::new(self.line).ok_or_else(|| {
442-
Error::new("Invalid value `0` for `position.line`. The line index is 1-indexed.")
443-
})?,
444-
OneIndexed::new(self.column).ok_or_else(|| {
445-
Error::new(
446-
"Invalid value `0` for `position.column`. The column index is 1-indexed.",
447-
)
448-
})?,
441+
SourceLocation {
442+
line: OneIndexed::new(self.line).ok_or_else(|| {
443+
Error::new(
444+
"Invalid value `0` for `position.line`. The line index is 1-indexed.",
445+
)
446+
})?,
447+
character_offset: OneIndexed::new(self.column).ok_or_else(|| {
448+
Error::new(
449+
"Invalid value `0` for `position.column`. The column index is 1-indexed.",
450+
)
451+
})?,
452+
},
449453
text,
454+
PositionEncoding::Utf32,
450455
);
451456

452457
Ok(text_size)
453458
}
454459

455460
fn from_text_size(offset: TextSize, line_index: &LineIndex, source: &str) -> Self {
456-
line_index.source_location(offset, source).into()
461+
line_index.line_column(offset, source).into()
457462
}
458463
}
459464

460-
impl From<SourceLocation> for Position {
461-
fn from(location: SourceLocation) -> Self {
465+
impl From<LineColumn> for Position {
466+
fn from(location: LineColumn) -> Self {
462467
Self {
463-
line: location.row.get(),
468+
line: location.line.get(),
464469
column: location.column.get(),
465470
}
466471
}

crates/ruff/src/args.rs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::path::{Path, PathBuf};
55
use std::str::FromStr;
66
use std::sync::Arc;
77

8+
use crate::commands::completions::config::{OptionString, OptionStringParser};
89
use anyhow::bail;
910
use clap::builder::{TypedValueParser, ValueParserFactory};
1011
use clap::{command, Parser, Subcommand};
@@ -22,7 +23,7 @@ use ruff_linter::settings::types::{
2223
};
2324
use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser};
2425
use ruff_python_ast as ast;
25-
use ruff_source_file::{LineIndex, OneIndexed};
26+
use ruff_source_file::{LineIndex, OneIndexed, PositionEncoding};
2627
use ruff_text_size::TextRange;
2728
use ruff_workspace::configuration::{Configuration, RuleSelection};
2829
use ruff_workspace::options::{Options, PycodestyleOptions};
@@ -31,8 +32,6 @@ use ruff_workspace::resolver::ConfigurationTransformer;
3132
use rustc_hash::FxHashMap;
3233
use toml;
3334

34-
use crate::commands::completions::config::{OptionString, OptionStringParser};
35-
3635
/// All configuration options that can be passed "globally",
3736
/// i.e., can be passed to all subcommands
3837
#[derive(Debug, Default, Clone, clap::Args)]
@@ -1070,8 +1069,9 @@ impl FormatRange {
10701069
///
10711070
/// Returns an empty range if the start range is past the end of `source`.
10721071
pub(super) fn to_text_range(self, source: &str, line_index: &LineIndex) -> TextRange {
1073-
let start_byte_offset = line_index.offset(self.start.line, self.start.column, source);
1074-
let end_byte_offset = line_index.offset(self.end.line, self.end.column, source);
1072+
let start_byte_offset =
1073+
line_index.offset(self.start.into(), source, PositionEncoding::Utf32);
1074+
let end_byte_offset = line_index.offset(self.end.into(), source, PositionEncoding::Utf32);
10751075

10761076
TextRange::new(start_byte_offset, end_byte_offset)
10771077
}
@@ -1142,6 +1142,15 @@ pub struct LineColumn {
11421142
pub column: OneIndexed,
11431143
}
11441144

1145+
impl From<LineColumn> for ruff_source_file::SourceLocation {
1146+
fn from(value: LineColumn) -> Self {
1147+
Self {
1148+
line: value.line,
1149+
character_offset: value.column,
1150+
}
1151+
}
1152+
}
1153+
11451154
impl std::fmt::Display for LineColumn {
11461155
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
11471156
write!(f, "{line}:{column}", line = self.line, column = self.column)

crates/ruff_db/src/diagnostic/render.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ impl std::fmt::Display for DisplayDiagnostic<'_> {
7171
write!(f, " {path}", path = self.resolver.path(span.file()))?;
7272
if let Some(range) = span.range() {
7373
let input = self.resolver.input(span.file());
74-
let start = input.as_source_code().source_location(range.start());
75-
write!(f, ":{line}:{col}", line = start.row, col = start.column)?;
74+
let start = input.as_source_code().line_column(range.start());
75+
write!(f, ":{line}:{col}", line = start.line, col = start.column)?;
7676
}
7777
write!(f, ":")?;
7878
}

crates/ruff_linter/src/checkers/ast/analyze/definitions.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ pub(crate) fn definitions(checker: &mut Checker) {
191191
warn_user!(
192192
"Docstring at {}:{}:{} contains implicit string concatenation; ignoring...",
193193
relativize_path(checker.path),
194-
location.row,
194+
location.line,
195195
location.column
196196
);
197197
continue;

crates/ruff_linter/src/locator.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
use std::cell::OnceCell;
44

5-
use ruff_source_file::{LineIndex, LineRanges, OneIndexed, SourceCode, SourceLocation};
5+
use ruff_source_file::{LineColumn, LineIndex, LineRanges, OneIndexed, SourceCode};
66
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
77

88
#[derive(Debug)]
@@ -36,8 +36,8 @@ impl<'a> Locator<'a> {
3636
#[deprecated(
3737
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
3838
)]
39-
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
40-
self.to_source_code().source_location(offset)
39+
pub fn compute_source_location(&self, offset: TextSize) -> LineColumn {
40+
self.to_source_code().line_column(offset)
4141
}
4242

4343
pub fn to_index(&self) -> &LineIndex {

0 commit comments

Comments
 (0)