Skip to content

Commit 12671eb

Browse files
Record parser errors in NodeList to preserve source fidelity (#239)
1 parent 6c77d91 commit 12671eb

File tree

45 files changed

+1641
-623
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1641
-623
lines changed

crates/djls-ide/src/diagnostics.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,13 @@ impl DiagnosticError for ValidationError {
3333
fn span(&self) -> Option<(u32, u32)> {
3434
match self {
3535
ValidationError::UnbalancedStructure { opening_span, .. } => {
36-
Some((opening_span.start, opening_span.length))
36+
Some(opening_span.as_tuple())
3737
}
3838
ValidationError::UnclosedTag { span, .. }
3939
| ValidationError::OrphanedTag { span, .. }
4040
| ValidationError::UnmatchedBlockName { span, .. }
4141
| ValidationError::MissingRequiredArguments { span, .. }
42-
| ValidationError::TooManyArguments { span, .. } => Some((span.start, span.length)),
42+
| ValidationError::TooManyArguments { span, .. } => Some(span.as_tuple()),
4343
}
4444
}
4545

crates/djls-source/src/position.rs

Lines changed: 91 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,53 @@ use serde::Serialize;
22

33
/// A byte offset within a text document.
44
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
5-
pub struct ByteOffset(pub u32);
5+
pub struct ByteOffset(u32);
6+
7+
impl ByteOffset {
8+
#[must_use]
9+
pub fn new(offset: u32) -> Self {
10+
Self(offset)
11+
}
12+
13+
#[must_use]
14+
pub fn from_usize(offset: usize) -> Self {
15+
Self(u32::try_from(offset).unwrap_or(u32::MAX))
16+
}
17+
18+
#[must_use]
19+
pub fn offset(&self) -> u32 {
20+
self.0
21+
}
22+
}
623

724
/// A line and column position within a text document.
825
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
9-
pub struct LineCol(pub (u32, u32));
26+
pub struct LineCol {
27+
line: u32,
28+
column: u32,
29+
}
1030

1131
impl LineCol {
32+
#[must_use]
33+
pub fn new(line: u32, column: u32) -> Self {
34+
Self { line, column }
35+
}
36+
1237
#[must_use]
1338
pub fn line(&self) -> u32 {
14-
self.0 .0
39+
self.line
1540
}
1641

1742
#[must_use]
1843
pub fn column(&self) -> u32 {
19-
self.0 .1
44+
self.column
2045
}
2146
}
2247

2348
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
2449
pub struct Span {
25-
pub start: u32,
26-
pub length: u32,
50+
start: u32,
51+
length: u32,
2752
}
2853

2954
impl Span {
@@ -32,6 +57,61 @@ impl Span {
3257
Self { start, length }
3358
}
3459

60+
#[must_use]
61+
pub fn from_parts(start: usize, length: usize) -> Self {
62+
let start_u32 = u32::try_from(start).unwrap_or(u32::MAX);
63+
let length_u32 = u32::try_from(length).unwrap_or(u32::MAX.saturating_sub(start_u32));
64+
Span::new(start_u32, length_u32)
65+
}
66+
67+
#[must_use]
68+
pub fn with_length_usize(self, length: usize) -> Self {
69+
Self::from_parts(self.start_usize(), length)
70+
}
71+
72+
/// Construct a span from integer bounds expressed as byte offsets.
73+
#[must_use]
74+
pub fn from_bounds(start: usize, end: usize) -> Self {
75+
Self::from_parts(start, end.saturating_sub(start))
76+
}
77+
78+
#[must_use]
79+
pub fn expand(self, opening: u32, closing: u32) -> Self {
80+
let start_expand = self.start.saturating_sub(opening);
81+
let length_expand = opening + self.length + closing;
82+
Self::new(start_expand, length_expand)
83+
}
84+
85+
#[must_use]
86+
pub fn as_tuple(self) -> (u32, u32) {
87+
(self.start, self.length)
88+
}
89+
90+
#[must_use]
91+
pub fn start(self) -> u32 {
92+
self.start
93+
}
94+
95+
#[must_use]
96+
pub fn start_usize(self) -> usize {
97+
self.start as usize
98+
}
99+
100+
#[must_use]
101+
pub fn end(self) -> u32 {
102+
self.start + self.length
103+
}
104+
105+
#[must_use]
106+
pub fn length(self) -> u32 {
107+
self.length
108+
}
109+
110+
#[must_use]
111+
pub fn length_usize(self) -> usize {
112+
self.length as usize
113+
}
114+
35115
#[must_use]
36116
pub fn start_offset(&self) -> ByteOffset {
37117
ByteOffset(self.start)
@@ -91,7 +171,7 @@ impl LineIndex {
91171
#[must_use]
92172
pub fn to_line_col(&self, offset: ByteOffset) -> LineCol {
93173
if self.0.is_empty() {
94-
return LineCol((0, 0));
174+
return LineCol::new(0, 0);
95175
}
96176

97177
let line = match self.0.binary_search(&offset.0) {
@@ -103,7 +183,7 @@ impl LineIndex {
103183
let line_start = self.0[line];
104184
let column = offset.0.saturating_sub(line_start);
105185

106-
LineCol((u32::try_from(line).unwrap_or_default(), column))
186+
LineCol::new(u32::try_from(line).unwrap_or_default(), column)
107187
}
108188

109189
#[must_use]
@@ -160,8 +240,8 @@ mod tests {
160240
let index = LineIndex::from_text(text);
161241

162242
// "hello" is 5 bytes, then \r\n, so "world" starts at byte 7
163-
assert_eq!(index.to_line_col(ByteOffset(0)), LineCol((0, 0)));
164-
assert_eq!(index.to_line_col(ByteOffset(7)), LineCol((1, 0)));
165-
assert_eq!(index.to_line_col(ByteOffset(8)), LineCol((1, 1)));
243+
assert_eq!(index.to_line_col(ByteOffset(0)), LineCol::new(0, 0));
244+
assert_eq!(index.to_line_col(ByteOffset(7)), LineCol::new(1, 0));
245+
assert_eq!(index.to_line_col(ByteOffset(8)), LineCol::new(1, 1));
166246
}
167247
}

crates/djls-source/src/protocol.rs

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,10 @@ impl PositionEncoding {
6060
/// // UTF-16: "Hello " (6) + "🌍" (2 UTF-16 units) = position 8
6161
/// let offset = PositionEncoding::Utf16.line_col_to_offset(
6262
/// &index,
63-
/// LineCol((0, 8)),
63+
/// LineCol::new(0, 8),
6464
/// text
6565
/// );
66-
/// assert_eq!(offset, Some(ByteOffset(10))); // "Hello 🌍" is 10 bytes
66+
/// assert_eq!(offset, Some(ByteOffset::new(10))); // "Hello 🌍" is 10 bytes
6767
/// ```
6868
#[must_use]
6969
pub fn line_col_to_offset(
@@ -78,11 +78,11 @@ impl PositionEncoding {
7878
// Handle line bounds - if line > line_count, return document length
7979
let line_start_utf8 = match index.lines().get(line as usize) {
8080
Some(start) => *start,
81-
None => return Some(ByteOffset(u32::try_from(text.len()).unwrap_or(u32::MAX))),
81+
None => return Some(ByteOffset::from_usize(text.len())),
8282
};
8383

8484
if character == 0 {
85-
return Some(ByteOffset(line_start_utf8));
85+
return Some(ByteOffset::new(line_start_utf8));
8686
}
8787

8888
let next_line_start = index
@@ -96,14 +96,14 @@ impl PositionEncoding {
9696
// Fast path optimization for ASCII text, all encodings are equivalent to byte offsets
9797
if line_text.is_ascii() {
9898
let char_offset = character.min(u32::try_from(line_text.len()).unwrap_or(u32::MAX));
99-
return Some(ByteOffset(line_start_utf8 + char_offset));
99+
return Some(ByteOffset::new(line_start_utf8 + char_offset));
100100
}
101101

102102
match self {
103103
PositionEncoding::Utf8 => {
104104
// UTF-8: character positions are already byte offsets
105105
let char_offset = character.min(u32::try_from(line_text.len()).unwrap_or(u32::MAX));
106-
Some(ByteOffset(line_start_utf8 + char_offset))
106+
Some(ByteOffset::new(line_start_utf8 + char_offset))
107107
}
108108
PositionEncoding::Utf16 => {
109109
// UTF-16: count UTF-16 code units
@@ -119,7 +119,7 @@ impl PositionEncoding {
119119
}
120120

121121
// If character position exceeds line length, clamp to line end
122-
Some(ByteOffset(line_start_utf8 + utf8_pos))
122+
Some(ByteOffset::new(line_start_utf8 + utf8_pos))
123123
}
124124
PositionEncoding::Utf32 => {
125125
// UTF-32: count Unicode code points (characters)
@@ -133,7 +133,7 @@ impl PositionEncoding {
133133
}
134134

135135
// If character position exceeds line length, clamp to line end
136-
Some(ByteOffset(line_start_utf8 + utf8_pos))
136+
Some(ByteOffset::new(line_start_utf8 + utf8_pos))
137137
}
138138
}
139139
}
@@ -158,15 +158,15 @@ mod tests {
158158
// "Hello " = 6 UTF-16 units, "🌍" = 2 UTF-16 units
159159
// So position (0, 8) in UTF-16 should be after the emoji
160160
let offset = PositionEncoding::Utf16
161-
.line_col_to_offset(&index, LineCol((0, 8)), text)
161+
.line_col_to_offset(&index, LineCol::new(0, 8), text)
162162
.expect("Should get offset");
163-
assert_eq!(offset, ByteOffset(10)); // "Hello 🌍" is 10 bytes
163+
assert_eq!(offset, ByteOffset::new(10)); // "Hello 🌍" is 10 bytes
164164

165165
// In UTF-8, character 10 would be at the 'r' in 'world'
166166
let offset_utf8 = PositionEncoding::Utf8
167-
.line_col_to_offset(&index, LineCol((0, 10)), text)
167+
.line_col_to_offset(&index, LineCol::new(0, 10), text)
168168
.expect("Should get offset");
169-
assert_eq!(offset_utf8, ByteOffset(10));
169+
assert_eq!(offset_utf8, ByteOffset::new(10));
170170
}
171171

172172
#[test]
@@ -176,17 +176,17 @@ mod tests {
176176

177177
// For ASCII text, all encodings should give the same result
178178
let offset_utf8 = PositionEncoding::Utf8
179-
.line_col_to_offset(&index, LineCol((0, 5)), text)
179+
.line_col_to_offset(&index, LineCol::new(0, 5), text)
180180
.expect("Should get offset");
181181
let offset_utf16 = PositionEncoding::Utf16
182-
.line_col_to_offset(&index, LineCol((0, 5)), text)
182+
.line_col_to_offset(&index, LineCol::new(0, 5), text)
183183
.expect("Should get offset");
184184
let offset_utf32 = PositionEncoding::Utf32
185-
.line_col_to_offset(&index, LineCol((0, 5)), text)
185+
.line_col_to_offset(&index, LineCol::new(0, 5), text)
186186
.expect("Should get offset");
187187

188-
assert_eq!(offset_utf8, ByteOffset(5));
189-
assert_eq!(offset_utf16, ByteOffset(5));
190-
assert_eq!(offset_utf32, ByteOffset(5));
188+
assert_eq!(offset_utf8, ByteOffset::new(5));
189+
assert_eq!(offset_utf16, ByteOffset::new(5));
190+
assert_eq!(offset_utf32, ByteOffset::new(5));
191191
}
192192
}

0 commit comments

Comments
 (0)