Skip to content
Merged
95 changes: 73 additions & 22 deletions crates/ast-engine/src/replacer/indent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ fn get_new_line<C: Content>() -> C::Underlying {
fn get_space<C: Content>() -> C::Underlying {
C::decode_str(" ")[0].clone()
}
fn get_tab<C: Content>() -> C::Underlying {
C::decode_str("\t")[0].clone()
}
Comment on lines +104 to +106
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After adding get_tab and tab-aware indentation handling, several doc comments in this module still describe indentation strictly in terms of “spaces” / “space-based indentation” (e.g., DeindentedExtract::MultiLine docs and the module-level limitations). Updating those docs would keep the documentation consistent with the new behavior.

Copilot uses AI. Check for mistakes.

const MAX_LOOK_AHEAD: usize = 512;

Expand Down Expand Up @@ -183,21 +186,16 @@ pub fn formatted_slice<'a, C: Content>(
if !slice.contains(&get_new_line::<C>()) {
return Cow::Borrowed(slice);
}
let (indent, is_tab) = get_indent_at_offset_with_tab::<C>(content.get_range(0..start));
Cow::Owned(
indent_lines::<C>(
0,
&DeindentedExtract::MultiLine(
slice,
get_indent_at_offset::<C>(content.get_range(0..start)),
),
)
.into_owned(),
indent_lines::<C>(0, &DeindentedExtract::MultiLine(slice, indent), is_tab).into_owned(),
)
}

pub fn indent_lines<'a, C: Content>(
indent: usize,
extract: &'a DeindentedExtract<'a, C>,
is_tab: bool,
) -> Cow<'a, [C::Underlying]> {
use DeindentedExtract::{MultiLine, SingleLine};
let (lines, original_indent) = match extract {
Expand All @@ -213,18 +211,27 @@ pub fn indent_lines<'a, C: Content>(
Ordering::Less => Cow::Owned(indent_lines_impl::<C, _>(
indent - original_indent,
lines.split(|b| *b == get_new_line::<C>()),
is_tab,
)),
}
}

fn indent_lines_impl<'a, C, Lines>(indent: usize, mut lines: Lines) -> Vec<C::Underlying>
fn indent_lines_impl<'a, C, Lines>(
indent: usize,
mut lines: Lines,
is_tab: bool,
) -> Vec<C::Underlying>
where
C: Content + 'a,
Lines: Iterator<Item = &'a [C::Underlying]>,
{
let mut ret = vec![];
let space = get_space::<C>();
let leading: Vec<_> = std::iter::repeat_n(space, indent).collect();
let indent_char = if is_tab {
get_tab::<C>()
} else {
get_space::<C>()
};
let leading: Vec<_> = std::iter::repeat_n(indent_char, indent).collect();
// first line wasn't indented, so we don't add leading spaces
if let Some(line) = lines.next() {
ret.extend(line.iter().cloned());
Expand All @@ -241,40 +248,62 @@ where
/// returns 0 if no indent is found before the offset
/// either truly no indent exists, or the offset is in a long line
pub fn get_indent_at_offset<C: Content>(src: &[C::Underlying]) -> usize {
get_indent_at_offset_with_tab::<C>(src).0
}

/// returns (indent, is_tab)
pub fn get_indent_at_offset_with_tab<C: Content>(src: &[C::Underlying]) -> (usize, bool) {
let lookahead = src.len().max(MAX_LOOK_AHEAD) - MAX_LOOK_AHEAD;

let mut indent = 0;
let mut is_tab = false;
let new_line = get_new_line::<C>();
let space = get_space::<C>();
// TODO: support TAB. only whitespace is supported now
let tab = get_tab::<C>();
for c in src[lookahead..].iter().rev() {
if *c == new_line {
return indent;
return (indent, is_tab);
}
if *c == space {
indent += 1;
} else if *c == tab {
indent += 1;
is_tab = true;
Comment on lines 267 to +271
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_indent_at_offset_with_tab collapses indentation style into a single is_tab flag (set to true if any tab appears). For mixed indentation (e.g. " \t"), this will re-indent using only tabs and will not preserve the original prefix. Consider returning the actual indentation prefix (or an enum like Space/Tab/ Mixed with the concrete prefix) so re-indentation can reproduce mixed whitespace correctly.

Copilot uses AI. Check for mistakes.
} else {
indent = 0;
is_tab = false;
}
}
// lookahead == 0 means we have indentation at first line.
if lookahead == 0 && indent != 0 {
indent
(indent, is_tab)
} else {
0
(0, false)
}
}

// NOTE: we assume input is well indented.
// following lines should have fewer indentations than initial line
fn remove_indent<C: Content>(indent: usize, src: &[C::Underlying]) -> Vec<C::Underlying> {
let indentation: Vec<_> = std::iter::repeat_n(get_space::<C>(), indent).collect();
let new_line = get_new_line::<C>();
let space = get_space::<C>();
let tab = get_tab::<C>();
let lines: Vec<_> = src
.split(|b| *b == new_line)
.map(|line| match line.strip_prefix(&*indentation) {
Some(stripped) => stripped,
None => line,
.map(|line| {
let mut stripped = line;
let mut count = 0;
while count < indent {
if let Some(rest) = stripped.strip_prefix(&[space.clone()]) {
stripped = rest;
} else if let Some(rest) = stripped.strip_prefix(&[tab.clone()]) {
stripped = rest;
} else {
break;
}
count += 1;
}
stripped
})
.collect();
lines.join(&new_line).clone()
Expand All @@ -299,7 +328,7 @@ mod test {
.count();
let end = source.chars().count() - trailing_white;
let extracted = extract_with_deindent(&source, start..end);
let result_bytes = indent_lines::<String>(0, &extracted);
let result_bytes = indent_lines::<String>(0, &extracted, source.contains('\t'));
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In test_deindent, using source.contains('\t') to decide is_tab can mis-detect indentation style if the source contains a tab anywhere (e.g. in a string literal) but the indentation at start is spaces. It’d be more accurate to compute (indent, is_tab) from the prefix up to start via get_indent_at_offset_with_tab and pass that is_tab value.

Suggested change
let result_bytes = indent_lines::<String>(0, &extracted, source.contains('\t'));
let (_, is_tab) = get_indent_at_offset_with_tab::<String>(&source, start);
let result_bytes = indent_lines::<String>(0, &extracted, is_tab);

Copilot uses AI. Check for mistakes.
let actual = std::str::from_utf8(&result_bytes).unwrap();
assert_eq!(actual, expected);
}
Expand Down Expand Up @@ -391,8 +420,8 @@ pass
fn test_replace_with_indent(target: &str, start: usize, inserted: &str) -> String {
let target = target.to_string();
let replace_lines = DeindentedExtract::MultiLine(inserted.as_bytes(), 0);
let indent = get_indent_at_offset::<String>(&target.as_bytes()[..start]);
let ret = indent_lines::<String>(indent, &replace_lines);
let (indent, is_tab) = get_indent_at_offset_with_tab::<String>(&target.as_bytes()[..start]);
let ret = indent_lines::<String>(indent, &replace_lines, is_tab);
String::from_utf8(ret.to_vec()).unwrap()
}

Expand Down Expand Up @@ -445,4 +474,26 @@ pass
let actual = test_replace_with_indent(target, 6, inserted);
assert_eq!(actual, "def abc():\n pass");
}

#[test]
fn test_tab_indent() {
let src = "\n\t\tdef test():\n\t\t\tpass";
let expected = "def test():\n\tpass";
test_deindent(src, expected, 0);
}

#[test]
fn test_tab_replace() {
let target = "\t\t";
let inserted = "def abc(): pass";
let actual = test_replace_with_indent(target, 2, inserted);
assert_eq!(actual, "def abc(): pass");
let inserted = "def abc():\n\tpass";
let actual = test_replace_with_indent(target, 2, inserted);
assert_eq!(actual, "def abc():\n\t\t\tpass");

let target = "\t\tdef abc():\n\t\t\t";
let actual = test_replace_with_indent(target, 14, inserted);
assert_eq!(actual, "def abc():\n\t\tpass");
}
}
23 changes: 13 additions & 10 deletions crates/ast-engine/src/replacer/template.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
//
// SPDX-License-Identifier: AGPL-3.0-or-later AND MIT

use super::indent::{DeindentedExtract, extract_with_deindent, get_indent_at_offset, indent_lines};
use super::indent::{DeindentedExtract, extract_with_deindent, indent_lines};
use super::{MetaVarExtract, Replacer, split_first_meta_var};
use crate::NodeMatch;
use crate::language::Language;
Expand Down Expand Up @@ -52,10 +52,10 @@ impl TemplateFix {
impl<D: Doc> Replacer<D> for TemplateFix {
fn generate_replacement(&self, nm: &NodeMatch<'_, D>) -> Underlying<D> {
let leading = nm.get_doc().get_source().get_range(0..nm.range().start);
let indent = get_indent_at_offset::<D::Source>(leading);
let (indent, is_tab) = super::indent::get_indent_at_offset_with_tab::<D::Source>(leading);
let bytes = replace_fixer(self, nm.get_env());
let replaced = DeindentedExtract::MultiLine(&bytes, 0);
indent_lines::<D::Source>(indent, &replaced).to_vec()
indent_lines::<D::Source>(indent, &replaced, is_tab).to_vec()
}
}

Expand All @@ -64,7 +64,7 @@ type Indent = usize;
#[derive(Debug, Clone)]
pub struct Template {
fragments: Vec<String>,
vars: Vec<(MetaVarExtract, Indent)>,
vars: Vec<(MetaVarExtract, Indent, bool)>, // the third element is is_tab
}

fn create_template(
Expand All @@ -82,8 +82,10 @@ fn create_template(
{
fragments.push(tmpl[len..len + offset + i].to_string());
// NB we have to count ident of the full string
let indent = get_indent_at_offset::<String>(&tmpl.as_bytes()[..len + offset + i]);
vars.push((meta_var, indent));
let (indent, is_tab) = super::indent::get_indent_at_offset_with_tab::<String>(
&tmpl.as_bytes()[..len + offset + i],
);
vars.push((meta_var, indent, is_tab));
len += skipped + offset + i;
offset = 0;
continue;
Expand Down Expand Up @@ -113,8 +115,8 @@ fn replace_fixer<D: Doc>(fixer: &TemplateFix, env: &MetaVarEnv<'_, D>) -> Underl
if let Some(frag) = frags.next() {
ret.extend_from_slice(&D::Source::decode_str(frag));
}
for ((var, indent), frag) in vars.zip(frags) {
if let Some(bytes) = maybe_get_var(env, var, indent.to_owned()) {
for ((var, indent, is_tab), frag) in vars.zip(frags) {
if let Some(bytes) = maybe_get_var(env, var, indent.to_owned(), is_tab.to_owned()) {
Copy link

Copilot AI Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indent.to_owned() / is_tab.to_owned() are unnecessary here since usize and bool are Copy. Passing *indent / *is_tab (or destructuring by value earlier) would be simpler and avoids the extra trait calls.

Suggested change
if let Some(bytes) = maybe_get_var(env, var, indent.to_owned(), is_tab.to_owned()) {
if let Some(bytes) = maybe_get_var(env, var, *indent, *is_tab) {

Copilot uses AI. Check for mistakes.
ret.extend_from_slice(&bytes);
}
ret.extend_from_slice(&D::Source::decode_str(frag));
Expand All @@ -126,6 +128,7 @@ fn maybe_get_var<'e, 't, C, D>(
env: &'e MetaVarEnv<'t, D>,
var: &MetaVarExtract,
indent: usize,
is_tab: bool,
) -> Option<Cow<'e, [C::Underlying]>>
where
C: Content + 'e,
Expand All @@ -136,7 +139,7 @@ where
// transformed source does not have range, directly return bytes
let source = env.get_transformed(name)?;
let de_intended = DeindentedExtract::MultiLine(source, 0);
let bytes = indent_lines::<D::Source>(indent, &de_intended);
let bytes = indent_lines::<D::Source>(indent, &de_intended, is_tab);
return Some(Cow::Owned(bytes.into()));
}
MetaVarExtract::Single(name) => {
Expand All @@ -160,7 +163,7 @@ where
}
};
let extracted = extract_with_deindent(source, range);
let bytes = indent_lines::<D::Source>(indent, &extracted);
let bytes = indent_lines::<D::Source>(indent, &extracted, is_tab);
Some(Cow::Owned(bytes.into()))
}

Expand Down
Loading