diff --git a/crates/djls-ide/src/diagnostics.rs b/crates/djls-ide/src/diagnostics.rs index a47c2ddb..a7b31509 100644 --- a/crates/djls-ide/src/diagnostics.rs +++ b/crates/djls-ide/src/diagnostics.rs @@ -1,7 +1,7 @@ use djls_semantic::ValidationError; use djls_source::File; +use djls_source::LineIndex; use djls_source::Span; -use djls_templates::LineOffsets; use djls_templates::TemplateError; use djls_templates::TemplateErrorAccumulator; use tower_lsp_server::lsp_types; @@ -56,21 +56,17 @@ impl DiagnosticError for ValidationError { } /// Convert a Span to an LSP Range using line offsets. -fn span_to_lsp_range(span: Span, line_offsets: &LineOffsets) -> lsp_types::Range { - let start_pos = span.start as usize; - let end_pos = (span.start + span.length) as usize; - - let (start_line, start_char) = line_offsets.position_to_line_col(start_pos); - let (end_line, end_char) = line_offsets.position_to_line_col(end_pos); +fn span_to_lsp_range(span: Span, line_index: &LineIndex) -> lsp_types::Range { + let (start_pos, end_pos) = span.to_line_col(line_index); lsp_types::Range { start: lsp_types::Position { - line: u32::try_from(start_line - 1).unwrap_or(u32::MAX), // LSP is 0-based, LineOffsets is 1-based - character: u32::try_from(start_char).unwrap_or(u32::MAX), + line: start_pos.line(), + character: start_pos.column(), }, end: lsp_types::Position { - line: u32::try_from(end_line - 1).unwrap_or(u32::MAX), - character: u32::try_from(end_char).unwrap_or(u32::MAX), + line: end_pos.line(), + character: end_pos.column(), }, } } @@ -78,13 +74,13 @@ fn span_to_lsp_range(span: Span, line_offsets: &LineOffsets) -> lsp_types::Range /// Convert any error implementing `DiagnosticError` to an LSP diagnostic. fn error_to_diagnostic( error: &impl DiagnosticError, - line_offsets: &LineOffsets, + line_index: &LineIndex, ) -> lsp_types::Diagnostic { let range = error .span() .map(|(start, length)| { let span = Span::new(start, length); - span_to_lsp_range(span, line_offsets) + span_to_lsp_range(span, line_index) }) .unwrap_or_default(); @@ -134,13 +130,10 @@ pub fn collect_diagnostics( let template_errors = djls_templates::parse_template::accumulated::(db, file); - let line_offsets = nodelist - .as_ref() - .map(|nl| nl.line_offsets(db).clone()) - .unwrap_or_default(); + let line_index = file.line_index(db); for error_acc in template_errors { - diagnostics.push(error_to_diagnostic(&error_acc.0, &line_offsets)); + diagnostics.push(error_to_diagnostic(&error_acc.0, line_index)); } if let Some(nodelist) = nodelist { @@ -149,7 +142,7 @@ pub fn collect_diagnostics( >(db, nodelist); for error_acc in validation_errors { - diagnostics.push(error_to_diagnostic(&error_acc.0, &line_offsets)); + diagnostics.push(error_to_diagnostic(&error_acc.0, line_index)); } } diff --git a/crates/djls-source/src/file.rs b/crates/djls-source/src/file.rs index 0dca4f6e..c6a42869 100644 --- a/crates/djls-source/src/file.rs +++ b/crates/djls-source/src/file.rs @@ -5,6 +5,7 @@ use camino::Utf8Path; use camino::Utf8PathBuf; use crate::db::Db; +use crate::position::LineIndex; #[salsa::input] pub struct File { @@ -27,14 +28,7 @@ impl File { #[salsa::tracked(returns(ref))] pub fn line_index(self, db: &dyn Db) -> LineIndex { let text = self.source(db); - let mut starts = Vec::with_capacity(256); - starts.push(0); - for (i, b) in text.0.source.bytes().enumerate() { - if b == b'\n' { - starts.push(u32::try_from(i).unwrap_or_default() + 1); - } - } - LineIndex(starts) + LineIndex::from_text(text.0.source.as_str()) } } @@ -123,6 +117,3 @@ impl FileKind { } } } - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct LineIndex(Vec); diff --git a/crates/djls-source/src/lib.rs b/crates/djls-source/src/lib.rs index ed035339..b2227aef 100644 --- a/crates/djls-source/src/lib.rs +++ b/crates/djls-source/src/lib.rs @@ -1,8 +1,11 @@ mod db; mod file; -mod span; +mod position; pub use db::Db; pub use file::File; pub use file::FileKind; -pub use span::Span; +pub use position::ByteOffset; +pub use position::LineCol; +pub use position::LineIndex; +pub use position::Span; diff --git a/crates/djls-source/src/position.rs b/crates/djls-source/src/position.rs new file mode 100644 index 00000000..385d6ef7 --- /dev/null +++ b/crates/djls-source/src/position.rs @@ -0,0 +1,167 @@ +use serde::Serialize; + +/// A byte offset within a text document. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] +pub struct ByteOffset(pub u32); + +/// A line and column position within a text document. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct LineCol(pub (u32, u32)); + +impl LineCol { + #[must_use] + pub fn line(&self) -> u32 { + self.0 .0 + } + + #[must_use] + pub fn column(&self) -> u32 { + self.0 .1 + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] +pub struct Span { + pub start: u32, + pub length: u32, +} + +impl Span { + #[must_use] + pub fn new(start: u32, length: u32) -> Self { + Self { start, length } + } + + #[must_use] + pub fn start_offset(&self) -> ByteOffset { + ByteOffset(self.start) + } + + #[must_use] + pub fn end_offset(&self) -> ByteOffset { + ByteOffset(self.start.saturating_add(self.length)) + } + + /// Convert this span to start and end line/column positions using the given line index. + #[must_use] + pub fn to_line_col(&self, line_index: &LineIndex) -> (LineCol, LineCol) { + let start = line_index.to_line_col(self.start_offset()); + let end = line_index.to_line_col(self.end_offset()); + (start, end) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LineIndex(Vec); + +impl LineIndex { + #[must_use] + pub fn from_text(text: &str) -> Self { + let mut starts = Vec::with_capacity(256); + starts.push(0); + + let bytes = text.as_bytes(); + let mut i = 0; + while i < bytes.len() { + match bytes[i] { + b'\n' => { + // LF - Unix style line ending + starts.push(u32::try_from(i + 1).unwrap_or_default()); + i += 1; + } + b'\r' => { + // CR - check if followed by LF for Windows style + if i + 1 < bytes.len() && bytes[i + 1] == b'\n' { + // CRLF - Windows style line ending + starts.push(u32::try_from(i + 2).unwrap_or_default()); + i += 2; + } else { + // Just CR - old Mac style line ending + starts.push(u32::try_from(i + 1).unwrap_or_default()); + i += 1; + } + } + _ => i += 1, + } + } + + LineIndex(starts) + } + + #[must_use] + pub fn to_line_col(&self, offset: ByteOffset) -> LineCol { + if self.0.is_empty() { + return LineCol((0, 0)); + } + + let line = match self.0.binary_search(&offset.0) { + Ok(exact) => exact, + Err(0) => 0, + Err(next) => next - 1, + }; + + let line_start = self.0[line]; + let column = offset.0.saturating_sub(line_start); + + LineCol((u32::try_from(line).unwrap_or_default(), column)) + } + + #[must_use] + pub fn line_start(&self, line: u32) -> Option { + self.0.get(line as usize).copied() + } + + #[must_use] + pub fn lines(&self) -> &[u32] { + &self.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_line_index_unix_endings() { + let text = "line1\nline2\nline3"; + let index = LineIndex::from_text(text); + assert_eq!(index.lines(), &[0, 6, 12]); + } + + #[test] + fn test_line_index_windows_endings() { + let text = "line1\r\nline2\r\nline3"; + let index = LineIndex::from_text(text); + // After "line1\r\n" (7 bytes), next line starts at byte 7 + // After "line2\r\n" (7 bytes), next line starts at byte 14 + assert_eq!(index.lines(), &[0, 7, 14]); + } + + #[test] + fn test_line_index_mixed_endings() { + let text = "line1\nline2\r\nline3\rline4"; + let index = LineIndex::from_text(text); + // "line1\n" -> next at 6 + // "line2\r\n" -> next at 13 + // "line3\r" -> next at 19 + assert_eq!(index.lines(), &[0, 6, 13, 19]); + } + + #[test] + fn test_line_index_empty() { + let text = ""; + let index = LineIndex::from_text(text); + assert_eq!(index.lines(), &[0]); + } + + #[test] + fn test_to_line_col_with_crlf() { + let text = "hello\r\nworld"; + let index = LineIndex::from_text(text); + + // "hello" is 5 bytes, then \r\n, so "world" starts at byte 7 + assert_eq!(index.to_line_col(ByteOffset(0)), LineCol((0, 0))); + assert_eq!(index.to_line_col(ByteOffset(7)), LineCol((1, 0))); + assert_eq!(index.to_line_col(ByteOffset(8)), LineCol((1, 1))); + } +} diff --git a/crates/djls-source/src/span.rs b/crates/djls-source/src/span.rs deleted file mode 100644 index e91fbfd7..00000000 --- a/crates/djls-source/src/span.rs +++ /dev/null @@ -1,14 +0,0 @@ -use serde::Serialize; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] -pub struct Span { - pub start: u32, - pub length: u32, -} - -impl Span { - #[must_use] - pub fn new(start: u32, length: u32) -> Self { - Self { start, length } - } -} diff --git a/crates/djls-templates/src/lexer.rs b/crates/djls-templates/src/lexer.rs index fadb4d0e..efc005e8 100644 --- a/crates/djls-templates/src/lexer.rs +++ b/crates/djls-templates/src/lexer.rs @@ -1,5 +1,4 @@ use crate::db::Db as TemplateDb; -use crate::nodelist::LineOffsets; use crate::tokens::Token; use crate::tokens::TokenContent; @@ -28,9 +27,8 @@ impl<'db> Lexer<'db> { } } - pub fn tokenize(&mut self) -> (Vec>, LineOffsets) { + pub fn tokenize(&mut self) -> Vec> { let mut tokens = Vec::new(); - let mut line_offsets = LineOffsets::default(); while !self.is_at_end() { self.start = self.current; @@ -52,23 +50,12 @@ impl<'db> Lexer<'db> { _ => self.lex_text(), }; - match self.peek_previous() { - '\n' => line_offsets.add_line(u32::try_from(self.current).unwrap_or(u32::MAX)), - '\r' => { - line_offsets.add_line(u32::try_from(self.current).unwrap_or(u32::MAX)); - if self.peek() == '\n' { - self.current += 1; - } - } - _ => {} - } - tokens.push(token); } tokens.push(Token::Eof); - (tokens, line_offsets) + tokens } fn lex_django_construct( @@ -149,17 +136,6 @@ impl<'db> Lexer<'db> { chars.next().unwrap_or('\0') } - fn peek_previous(&self) -> char { - if self.current == 0 { - return '\0'; - } - let mut pos = self.current - 1; - while !self.source.is_char_boundary(pos) && pos > 0 { - pos -= 1; - } - self.source[pos..].chars().next().unwrap_or('\0') - } - #[inline] fn is_at_end(&self) -> bool { self.current >= self.source.len() @@ -244,7 +220,7 @@ mod tests { let db = TestDatabase::new(); let source = r#"
"#; let mut lexer = Lexer::new(&db, source); - let (tokens, _) = lexer.tokenize(); + let tokens = lexer.tokenize(); let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db); insta::assert_yaml_snapshot!(snapshot); } @@ -254,7 +230,7 @@ mod tests { let db = TestDatabase::new(); let source = "{{ user.name|default:\"Anonymous\"|title }}"; let mut lexer = Lexer::new(&db, source); - let (tokens, _) = lexer.tokenize(); + let tokens = lexer.tokenize(); let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db); insta::assert_yaml_snapshot!(snapshot); } @@ -264,7 +240,7 @@ mod tests { let db = TestDatabase::new(); let source = "{% if user.is_staff %}Admin{% else %}User{% endif %}"; let mut lexer = Lexer::new(&db, source); - let (tokens, _) = lexer.tokenize(); + let tokens = lexer.tokenize(); let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db); insta::assert_yaml_snapshot!(snapshot); } @@ -283,7 +259,7 @@ mod tests { /* CSS comment */ "; let mut lexer = Lexer::new(&db, source); - let (tokens, _) = lexer.tokenize(); + let tokens = lexer.tokenize(); let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db); insta::assert_yaml_snapshot!(snapshot); } @@ -299,7 +275,7 @@ mod tests { console.log(x); "#; let mut lexer = Lexer::new(&db, source); - let (tokens, _) = lexer.tokenize(); + let tokens = lexer.tokenize(); let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db); insta::assert_yaml_snapshot!(snapshot); } @@ -314,7 +290,7 @@ mod tests { } "#; let mut lexer = Lexer::new(&db, source); - let (tokens, _) = lexer.tokenize(); + let tokens = lexer.tokenize(); let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db); insta::assert_yaml_snapshot!(snapshot); } @@ -328,7 +304,7 @@ mod tests {
text
"; let mut lexer = Lexer::new(&db, source); - let (tokens, _) = lexer.tokenize(); + let tokens = lexer.tokenize(); let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db); insta::assert_yaml_snapshot!(snapshot); } @@ -367,7 +343,7 @@ mod tests { "#; let mut lexer = Lexer::new(&db, source); - let (tokens, _) = lexer.tokenize(); + let tokens = lexer.tokenize(); let snapshot = TokenSnapshotVec(tokens).to_snapshot(&db); insta::assert_yaml_snapshot!(snapshot); } @@ -377,7 +353,7 @@ mod tests { let db = TestDatabase::new(); let source = "