Skip to content

Commit 7bc5843

Browse files
GearsDatapackslpil
authored andcommitted
Document
1 parent 349fb98 commit 7bc5843

File tree

6 files changed

+62
-29
lines changed

6 files changed

+62
-29
lines changed

compiler-core/src/language_server.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ pub fn src_span_to_lsp_range(location: SrcSpan, line_numbers: &LineNumbers) -> R
5353

5454
pub fn lsp_range_to_src_span(range: Range, line_numbers: &LineNumbers) -> SrcSpan {
5555
let Range { start, end } = range;
56-
let start = line_numbers.byte_index(start.line, start.character);
57-
let end = line_numbers.byte_index(end.line, end.character);
56+
let start = line_numbers.byte_index(start.line + 1, start.character + 1);
57+
let end = line_numbers.byte_index(end.line + 1, end.character + 1);
5858
SrcSpan { start, end }
5959
}
6060

compiler-core/src/language_server/completer.rs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,10 @@ where
130130
&'a self,
131131
valid_phrase_char: &impl Fn(char) -> bool,
132132
) -> (Range, String) {
133-
let cursor = self
134-
.src_line_numbers
135-
.byte_index(self.cursor_position.line, self.cursor_position.character);
133+
let cursor = self.src_line_numbers.byte_index(
134+
self.cursor_position.line + 1,
135+
self.cursor_position.character + 1,
136+
);
136137

137138
// Get part of phrase prior to cursor
138139
let before = self
@@ -195,10 +196,10 @@ where
195196
pub fn import_completions(&'a self) -> Option<Result<Option<Vec<CompletionItem>>>> {
196197
let start_of_line = self
197198
.src_line_numbers
198-
.byte_index(self.cursor_position.line, 0);
199+
.byte_index(self.cursor_position.line + 1, 1);
199200
let end_of_line = self
200201
.src_line_numbers
201-
.byte_index(self.cursor_position.line + 1, 0);
202+
.byte_index(self.cursor_position.line + 2, 1);
202203

203204
// Drop all lines except the line the cursor is on
204205
let src = self.src.get(start_of_line as usize..end_of_line as usize)?;
@@ -561,9 +562,10 @@ where
561562
// e.x. when the user has typed mymodule.| we know local module and prelude values are no longer
562563
// relevant.
563564
if module_select.is_none() {
564-
let cursor = self
565-
.src_line_numbers
566-
.byte_index(self.cursor_position.line, self.cursor_position.character);
565+
let cursor = self.src_line_numbers.byte_index(
566+
self.cursor_position.line + 1,
567+
self.cursor_position.character + 1,
568+
);
567569

568570
// Find the function that the cursor is in and push completions for
569571
// its arguments and local variables.

compiler-core/src/language_server/edits.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ pub fn add_newlines_after_import(
4747
src: &str,
4848
) -> Newlines {
4949
let import_start_cursor =
50-
line_numbers.byte_index(import_location.line, import_location.character);
50+
line_numbers.byte_index(import_location.line + 1, import_location.character + 1);
5151
let is_new_line = src
5252
.chars()
5353
.nth(import_start_cursor as usize)

compiler-core/src/language_server/engine.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ where
266266
let completer = Completer::new(&src, &params, &this.compiler, module);
267267
let byte_index = completer
268268
.module_line_numbers
269-
.byte_index(params.position.line, params.position.character);
269+
.byte_index(params.position.line + 1, params.position.character + 1);
270270

271271
// If in comment context, do not provide completions
272272
if module.extra.is_within_comment(byte_index) {
@@ -629,7 +629,8 @@ where
629629
)))
630630
};
631631

632-
let byte_index = lines.byte_index(params.position.line, params.position.character);
632+
let byte_index =
633+
lines.byte_index(params.position.line + 1, params.position.character + 1);
633634

634635
Ok(match reference_for_ast_node(found, &current_module.name) {
635636
Some(Referenced::LocalVariable {
@@ -762,7 +763,8 @@ where
762763
return Ok(None);
763764
};
764765

765-
let byte_index = lines.byte_index(position.position.line, position.position.character);
766+
let byte_index =
767+
lines.byte_index(position.position.line + 1, position.position.character + 1);
766768

767769
Ok(match reference_for_ast_node(found, &module.name) {
768770
Some(Referenced::LocalVariable {
@@ -996,7 +998,8 @@ Unused labelled fields:
996998
module: &'a Module,
997999
) -> Option<(LineNumbers, Located<'a>)> {
9981000
let line_numbers = LineNumbers::new(&module.code);
999-
let byte_index = line_numbers.byte_index(params.position.line, params.position.character);
1001+
let byte_index =
1002+
line_numbers.byte_index(params.position.line + 1, params.position.character + 1);
10001003
let node = module.find_node(byte_index);
10011004
let node = node?;
10021005
Some((line_numbers, node))

compiler-core/src/language_server/tests.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -831,11 +831,13 @@ pub fn apply_code_edit(src: &str, mut change: Vec<lsp_types::TextEdit>) -> Strin
831831

832832
change.sort_by_key(|edit| (edit.range.start.line, edit.range.start.character));
833833
for edit in change {
834-
let start = line_numbers.byte_index(edit.range.start.line, edit.range.start.character)
834+
let start = line_numbers
835+
.byte_index(edit.range.start.line + 1, edit.range.start.character + 1)
836+
as i32
837+
- offset;
838+
let end = line_numbers.byte_index(edit.range.end.line + 1, edit.range.end.character + 1)
835839
as i32
836840
- offset;
837-
let end =
838-
line_numbers.byte_index(edit.range.end.line, edit.range.end.character) as i32 - offset;
839841
let range = (start as usize)..(end as usize);
840842
offset += end - start;
841843
offset -= edit.new_text.len() as i32;

compiler-core/src/line_numbers.rs

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,38 @@
11
use std::collections::HashMap;
22

3+
/// A struct which contains information about line numbers of a source file,
4+
/// and can convert between byte offsets that are used in the compiler and
5+
/// line-column pairs used in LSP.
36
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
47
pub struct LineNumbers {
8+
/// The byte offsets of the start of each line of the source file
59
pub line_starts: Vec<u32>,
10+
/// The total length of the source file
611
pub length: u32,
12+
/// A mapping of byte offsets to character length information. This is used
13+
/// when converting between byte indices and line-column numbers, because
14+
/// LSP uses UTF-16, while Rust encodes strings as UTF-8.
15+
///
16+
/// This only contains characters which are more than one byte in UTF-8,
17+
/// because one byte UTF-8 characters are one UTF-16 segment also, so no
18+
/// translation is needed.
19+
///
20+
/// We could store the whole source file here instead, however that would
21+
/// be quite wasteful. Most Gleam programs use only ASCII characters, meaning
22+
/// UTF-8 offsets are the same as UTF-16 ones. With this representation, we
23+
/// only need to store a few characters.
24+
///
25+
/// In most programs this will be empty because they will only be using
26+
/// ASCII characters.
727
pub mapping: HashMap<usize, Character>,
828
}
929

30+
/// Information about how a character is encoded in UTF-8 and UTF-16.
1031
#[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize, PartialEq, Eq)]
1132
pub struct Character {
33+
/// The number of bytes needed to encode this in UTF-8.
1234
pub length_utf8: u8,
35+
/// The number of 16-bit segments needed to encode this in UTF-16.
1336
pub length_utf16: u8,
1437
}
1538

@@ -43,14 +66,16 @@ impl LineNumbers {
4366
map
4467
}
4568

46-
/// Get the line number for a byte index
69+
/// Returns the 1-indexed line number of a given byte index
4770
pub fn line_number(&self, byte_index: u32) -> u32 {
4871
self.line_starts
4972
.binary_search(&byte_index)
5073
.unwrap_or_else(|next_line| next_line - 1) as u32
5174
+ 1
5275
}
5376

77+
/// Returns the 1-indexed line and column number of a given byte index,
78+
/// using a UTF-16 character offset.
5479
pub fn line_and_column_number(&self, byte_index: u32) -> LineColumn {
5580
let line = self.line_number(byte_index);
5681
let line_start = self
@@ -82,9 +107,10 @@ impl LineNumbers {
82107
}
83108
}
84109

85-
/// 0 indexed line and character to byte index
110+
/// Returns the byte index of the corresponding 1-indexed line and column
111+
/// numbers, translating from a UTF-16 character index to a UTF-8 byte index.
86112
pub fn byte_index(&self, line: u32, character: u32) -> u32 {
87-
let line_start = match self.line_starts.get(line as usize) {
113+
let line_start = match self.line_starts.get(line as usize - 1) {
88114
Some(&line_start) => line_start,
89115
None => return self.length,
90116
};
@@ -93,7 +119,7 @@ impl LineNumbers {
93119
let mut u16_offset = 0;
94120

95121
loop {
96-
if u16_offset >= character {
122+
if u16_offset >= character - 1 {
97123
break;
98124
}
99125

@@ -120,10 +146,10 @@ pub fn main() {
120146
"#;
121147
let line_numbers = LineNumbers::new(src);
122148

123-
assert_eq!(line_numbers.byte_index(0, 0), 0);
124-
assert_eq!(line_numbers.byte_index(0, 4), 4);
149+
assert_eq!(line_numbers.byte_index(1, 1), 0);
150+
assert_eq!(line_numbers.byte_index(1, 5), 4);
125151
assert_eq!(line_numbers.byte_index(100, 1), src.len() as u32);
126-
assert_eq!(line_numbers.byte_index(2, 1), 18);
152+
assert_eq!(line_numbers.byte_index(3, 2), 18);
127153
}
128154

129155
// https://github.com/gleam-lang/gleam/issues/3628
@@ -139,10 +165,10 @@ pub fn main() {
139165
"#;
140166
let line_numbers = LineNumbers::new(src);
141167

142-
assert_eq!(line_numbers.byte_index(1, 6), 30);
143-
assert_eq!(line_numbers.byte_index(5, 2), 52);
144-
assert_eq!(line_numbers.byte_index(5, 17), 75);
145-
assert_eq!(line_numbers.byte_index(6, 1), 91);
168+
assert_eq!(line_numbers.byte_index(2, 7), 30);
169+
assert_eq!(line_numbers.byte_index(6, 3), 52);
170+
assert_eq!(line_numbers.byte_index(6, 18), 75);
171+
assert_eq!(line_numbers.byte_index(7, 2), 91);
146172
}
147173

148174
// https://github.com/gleam-lang/gleam/issues/3628

0 commit comments

Comments
 (0)