-
Notifications
You must be signed in to change notification settings - Fork 0
feat(ast-engine): support TAB indentation parsing #100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
445aba9
115bae7
2a726a1
c450bf5
d2c0405
8ef87cd
88bffba
ae58cc0
8d96da4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -101,6 +101,9 @@ fn get_new_line<C: Content>() -> C::Underlying { | |||||||
| fn get_space<C: Content>() -> C::Underlying { | ||||||||
| C::decode_str(" ")[0].clone() | ||||||||
| } | ||||||||
| fn get_tab<C: Content>() -> C::Underlying { | ||||||||
| C::decode_str("\t")[0].clone() | ||||||||
| } | ||||||||
|
|
||||||||
| const MAX_LOOK_AHEAD: usize = 512; | ||||||||
|
|
||||||||
|
|
@@ -183,21 +186,16 @@ pub fn formatted_slice<'a, C: Content>( | |||||||
| if !slice.contains(&get_new_line::<C>()) { | ||||||||
| return Cow::Borrowed(slice); | ||||||||
| } | ||||||||
| let (indent, is_tab) = get_indent_at_offset_with_tab::<C>(content.get_range(0..start)); | ||||||||
| Cow::Owned( | ||||||||
| indent_lines::<C>( | ||||||||
| 0, | ||||||||
| &DeindentedExtract::MultiLine( | ||||||||
| slice, | ||||||||
| get_indent_at_offset::<C>(content.get_range(0..start)), | ||||||||
| ), | ||||||||
| ) | ||||||||
| .into_owned(), | ||||||||
| indent_lines::<C>(0, &DeindentedExtract::MultiLine(slice, indent), is_tab).into_owned(), | ||||||||
| ) | ||||||||
| } | ||||||||
|
|
||||||||
| pub fn indent_lines<'a, C: Content>( | ||||||||
| indent: usize, | ||||||||
| extract: &'a DeindentedExtract<'a, C>, | ||||||||
| is_tab: bool, | ||||||||
| ) -> Cow<'a, [C::Underlying]> { | ||||||||
| use DeindentedExtract::{MultiLine, SingleLine}; | ||||||||
| let (lines, original_indent) = match extract { | ||||||||
|
|
@@ -213,18 +211,27 @@ pub fn indent_lines<'a, C: Content>( | |||||||
| Ordering::Less => Cow::Owned(indent_lines_impl::<C, _>( | ||||||||
| indent - original_indent, | ||||||||
| lines.split(|b| *b == get_new_line::<C>()), | ||||||||
| is_tab, | ||||||||
| )), | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| fn indent_lines_impl<'a, C, Lines>(indent: usize, mut lines: Lines) -> Vec<C::Underlying> | ||||||||
| fn indent_lines_impl<'a, C, Lines>( | ||||||||
| indent: usize, | ||||||||
| mut lines: Lines, | ||||||||
| is_tab: bool, | ||||||||
| ) -> Vec<C::Underlying> | ||||||||
| where | ||||||||
| C: Content + 'a, | ||||||||
| Lines: Iterator<Item = &'a [C::Underlying]>, | ||||||||
| { | ||||||||
| let mut ret = vec![]; | ||||||||
| let space = get_space::<C>(); | ||||||||
| let leading: Vec<_> = std::iter::repeat_n(space, indent).collect(); | ||||||||
| let indent_char = if is_tab { | ||||||||
| get_tab::<C>() | ||||||||
| } else { | ||||||||
| get_space::<C>() | ||||||||
| }; | ||||||||
| let leading: Vec<_> = std::iter::repeat_n(indent_char, indent).collect(); | ||||||||
| // first line wasn't indented, so we don't add leading spaces | ||||||||
| if let Some(line) = lines.next() { | ||||||||
| ret.extend(line.iter().cloned()); | ||||||||
|
|
@@ -241,40 +248,62 @@ where | |||||||
| /// returns 0 if no indent is found before the offset | ||||||||
| /// either truly no indent exists, or the offset is in a long line | ||||||||
| pub fn get_indent_at_offset<C: Content>(src: &[C::Underlying]) -> usize { | ||||||||
| get_indent_at_offset_with_tab::<C>(src).0 | ||||||||
| } | ||||||||
|
|
||||||||
| /// returns (indent, is_tab) | ||||||||
| pub fn get_indent_at_offset_with_tab<C: Content>(src: &[C::Underlying]) -> (usize, bool) { | ||||||||
| let lookahead = src.len().max(MAX_LOOK_AHEAD) - MAX_LOOK_AHEAD; | ||||||||
|
|
||||||||
| let mut indent = 0; | ||||||||
| let mut is_tab = false; | ||||||||
| let new_line = get_new_line::<C>(); | ||||||||
| let space = get_space::<C>(); | ||||||||
| // TODO: support TAB. only whitespace is supported now | ||||||||
| let tab = get_tab::<C>(); | ||||||||
| for c in src[lookahead..].iter().rev() { | ||||||||
| if *c == new_line { | ||||||||
sourcery-ai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
| return indent; | ||||||||
| return (indent, is_tab); | ||||||||
| } | ||||||||
| if *c == space { | ||||||||
| indent += 1; | ||||||||
| } else if *c == tab { | ||||||||
| indent += 1; | ||||||||
| is_tab = true; | ||||||||
|
Comment on lines
267
to
+271
|
||||||||
| } else { | ||||||||
| indent = 0; | ||||||||
| is_tab = false; | ||||||||
| } | ||||||||
| } | ||||||||
| // lookahead == 0 means we have indentation at first line. | ||||||||
| if lookahead == 0 && indent != 0 { | ||||||||
| indent | ||||||||
| (indent, is_tab) | ||||||||
| } else { | ||||||||
| 0 | ||||||||
| (0, false) | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| // NOTE: we assume input is well indented. | ||||||||
| // following lines should have fewer indentations than initial line | ||||||||
| fn remove_indent<C: Content>(indent: usize, src: &[C::Underlying]) -> Vec<C::Underlying> { | ||||||||
| let indentation: Vec<_> = std::iter::repeat_n(get_space::<C>(), indent).collect(); | ||||||||
| let new_line = get_new_line::<C>(); | ||||||||
| let space = get_space::<C>(); | ||||||||
| let tab = get_tab::<C>(); | ||||||||
| let lines: Vec<_> = src | ||||||||
| .split(|b| *b == new_line) | ||||||||
| .map(|line| match line.strip_prefix(&*indentation) { | ||||||||
| Some(stripped) => stripped, | ||||||||
| None => line, | ||||||||
| .map(|line| { | ||||||||
| let mut stripped = line; | ||||||||
| let mut count = 0; | ||||||||
| while count < indent { | ||||||||
| if let Some(rest) = stripped.strip_prefix(&[space.clone()]) { | ||||||||
| stripped = rest; | ||||||||
| } else if let Some(rest) = stripped.strip_prefix(&[tab.clone()]) { | ||||||||
| stripped = rest; | ||||||||
| } else { | ||||||||
| break; | ||||||||
| } | ||||||||
| count += 1; | ||||||||
| } | ||||||||
| stripped | ||||||||
| }) | ||||||||
| .collect(); | ||||||||
| lines.join(&new_line).clone() | ||||||||
|
|
@@ -299,7 +328,7 @@ mod test { | |||||||
| .count(); | ||||||||
| let end = source.chars().count() - trailing_white; | ||||||||
| let extracted = extract_with_deindent(&source, start..end); | ||||||||
| let result_bytes = indent_lines::<String>(0, &extracted); | ||||||||
| let result_bytes = indent_lines::<String>(0, &extracted, source.contains('\t')); | ||||||||
|
||||||||
| let result_bytes = indent_lines::<String>(0, &extracted, source.contains('\t')); | |
| let (_, is_tab) = get_indent_at_offset_with_tab::<String>(&source, start); | |
| let result_bytes = indent_lines::<String>(0, &extracted, is_tab); |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4,7 +4,7 @@ | |||||
| // | ||||||
| // SPDX-License-Identifier: AGPL-3.0-or-later AND MIT | ||||||
|
|
||||||
| use super::indent::{DeindentedExtract, extract_with_deindent, get_indent_at_offset, indent_lines}; | ||||||
| use super::indent::{DeindentedExtract, extract_with_deindent, indent_lines}; | ||||||
| use super::{MetaVarExtract, Replacer, split_first_meta_var}; | ||||||
| use crate::NodeMatch; | ||||||
| use crate::language::Language; | ||||||
|
|
@@ -52,10 +52,10 @@ impl TemplateFix { | |||||
| impl<D: Doc> Replacer<D> for TemplateFix { | ||||||
| fn generate_replacement(&self, nm: &NodeMatch<'_, D>) -> Underlying<D> { | ||||||
| let leading = nm.get_doc().get_source().get_range(0..nm.range().start); | ||||||
| let indent = get_indent_at_offset::<D::Source>(leading); | ||||||
| let (indent, is_tab) = super::indent::get_indent_at_offset_with_tab::<D::Source>(leading); | ||||||
| let bytes = replace_fixer(self, nm.get_env()); | ||||||
| let replaced = DeindentedExtract::MultiLine(&bytes, 0); | ||||||
| indent_lines::<D::Source>(indent, &replaced).to_vec() | ||||||
| indent_lines::<D::Source>(indent, &replaced, is_tab).to_vec() | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -64,7 +64,7 @@ type Indent = usize; | |||||
| #[derive(Debug, Clone)] | ||||||
| pub struct Template { | ||||||
| fragments: Vec<String>, | ||||||
| vars: Vec<(MetaVarExtract, Indent)>, | ||||||
| vars: Vec<(MetaVarExtract, Indent, bool)>, // the third element is is_tab | ||||||
| } | ||||||
|
|
||||||
| fn create_template( | ||||||
|
|
@@ -82,8 +82,10 @@ fn create_template( | |||||
| { | ||||||
| fragments.push(tmpl[len..len + offset + i].to_string()); | ||||||
| // NB we have to count ident of the full string | ||||||
| let indent = get_indent_at_offset::<String>(&tmpl.as_bytes()[..len + offset + i]); | ||||||
| vars.push((meta_var, indent)); | ||||||
| let (indent, is_tab) = super::indent::get_indent_at_offset_with_tab::<String>( | ||||||
| &tmpl.as_bytes()[..len + offset + i], | ||||||
| ); | ||||||
| vars.push((meta_var, indent, is_tab)); | ||||||
| len += skipped + offset + i; | ||||||
| offset = 0; | ||||||
| continue; | ||||||
|
|
@@ -113,8 +115,8 @@ fn replace_fixer<D: Doc>(fixer: &TemplateFix, env: &MetaVarEnv<'_, D>) -> Underl | |||||
| if let Some(frag) = frags.next() { | ||||||
| ret.extend_from_slice(&D::Source::decode_str(frag)); | ||||||
| } | ||||||
| for ((var, indent), frag) in vars.zip(frags) { | ||||||
| if let Some(bytes) = maybe_get_var(env, var, indent.to_owned()) { | ||||||
| for ((var, indent, is_tab), frag) in vars.zip(frags) { | ||||||
| if let Some(bytes) = maybe_get_var(env, var, indent.to_owned(), is_tab.to_owned()) { | ||||||
|
||||||
| if let Some(bytes) = maybe_get_var(env, var, indent.to_owned(), is_tab.to_owned()) { | |
| if let Some(bytes) = maybe_get_var(env, var, *indent, *is_tab) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After adding
get_taband tab-aware indentation handling, several doc comments in this module still describe indentation strictly in terms of “spaces” / “space-based indentation” (e.g.,DeindentedExtract::MultiLinedocs and the module-level limitations). Updating those docs would keep the documentation consistent with the new behavior.