Skip to content

Commit 8ef87cd

Browse files
Copilotbashandbone
andauthored
fix(ast-engine): correct TAB indentation detection and re-indentation behavior (#101)
* Initial plan * fix(ast-engine): address review comments on TAB indentation support - template.rs:119: use *indent/*is_tab (Copy types) instead of .to_owned() - indent.rs: fix get_indent_at_offset_with_tab to only set is_tab=true for pure-tab indentation; mixed indentation falls back to spaces - indent.rs:331: use get_indent_at_offset_with_tab in test_deindent for accurate is_tab detection instead of source.contains('\t') - indent.rs:104-106: update doc comments to reflect tab/mixed support Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com> * fix(ast-engine): use byte indices in test_deindent helper Replace .chars().count() with str::trim_start/trim_end length arithmetic so start/end are byte offsets throughout, making the helper correct for non-ASCII / multi-byte UTF-8 input. Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com>
1 parent d2c0405 commit 8ef87cd

File tree

2 files changed

+29
-24
lines changed

2 files changed

+29
-24
lines changed

crates/ast-engine/src/replacer/indent.rs

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@
8484
//!
8585
//! ## Limitations
8686
//!
87-
//! - Only supports space-based indentation (tabs not fully supported)
87+
//! - Handles both space-based and tab-based indentation; mixed indentation
88+
//! (spaces and tabs on the same line) falls back to space-based re-indentation
8889
//! - Assumes well-formed input indentation
8990
//! - Performance overhead for large code blocks
9091
//! - Complex algorithm with edge cases
@@ -120,13 +121,13 @@ pub enum DeindentedExtract<'a, C: Content> {
120121

121122
/// Multi-line content with original indentation level recorded.
122123
///
123-
/// Contains the content bytes and the number of spaces that were used
124-
/// for indentation in the original context. The first line's indentation
125-
/// is not included in the content.
124+
/// Contains the content bytes and the number of whitespace characters
125+
/// (spaces or tabs) used for indentation in the original context. The first
126+
/// line's indentation is not included in the content.
126127
///
127128
/// # Fields
128129
/// - Content bytes with relative indentation preserved
129-
/// - Original indentation level (number of spaces)
130+
/// - Original indentation level (number of whitespace characters)
130131
MultiLine(&'a [C::Underlying], usize),
131132
}
132133

@@ -251,32 +252,40 @@ pub fn get_indent_at_offset<C: Content>(src: &[C::Underlying]) -> usize {
251252
get_indent_at_offset_with_tab::<C>(src).0
252253
}
253254

254-
/// returns (indent, `is_tab`)
255+
/// Returns `(indent_count, is_tab)` for the current line's leading whitespace.
256+
///
257+
/// `is_tab` is `true` only when the entire indentation prefix consists of tab
258+
/// characters. For mixed indentation (e.g. `" \t"`) `is_tab` is `false` so that
259+
/// re-indentation falls back to space-based expansion rather than silently
260+
/// replacing the prefix with all tabs.
255261
pub fn get_indent_at_offset_with_tab<C: Content>(src: &[C::Underlying]) -> (usize, bool) {
256262
let lookahead = src.len().max(MAX_LOOK_AHEAD) - MAX_LOOK_AHEAD;
257263

258264
let mut indent = 0;
259-
let mut is_tab = false;
265+
let mut has_tab = false;
266+
let mut has_space = false;
260267
let new_line = get_new_line::<C>();
261268
let space = get_space::<C>();
262269
let tab = get_tab::<C>();
263270
for c in src[lookahead..].iter().rev() {
264271
if *c == new_line {
265-
return (indent, is_tab);
272+
return (indent, has_tab && !has_space);
266273
}
267274
if *c == space {
268275
indent += 1;
276+
has_space = true;
269277
} else if *c == tab {
270278
indent += 1;
271-
is_tab = true;
279+
has_tab = true;
272280
} else {
273281
indent = 0;
274-
is_tab = false;
282+
has_tab = false;
283+
has_space = false;
275284
}
276285
}
277286
// lookahead == 0 means we have indentation at first line.
278287
if lookahead == 0 && indent != 0 {
279-
(indent, is_tab)
288+
(indent, has_tab && !has_space)
280289
} else {
281290
(0, false)
282291
}
@@ -316,19 +325,15 @@ mod test {
316325
fn test_deindent(source: &str, expected: &str, offset: usize) {
317326
let source = source.to_string();
318327
let expected = expected.trim();
319-
let start = source[offset..]
320-
.chars()
321-
.take_while(|n| n.is_whitespace())
322-
.count()
323-
+ offset;
324-
let trailing_white = source
325-
.chars()
326-
.rev()
327-
.take_while(|n| n.is_whitespace())
328-
.count();
329-
let end = source.chars().count() - trailing_white;
328+
// Derive byte indices rather than character counts so that the slice
329+
// operations (`extract_with_deindent`, `get_indent_at_offset_with_tab`)
330+
// work correctly for non-ASCII / multi-byte UTF-8 input as well.
331+
let leading_ws_bytes = source[offset..].len() - source[offset..].trim_start().len();
332+
let start = offset + leading_ws_bytes;
333+
let end = source.trim_end().len();
330334
let extracted = extract_with_deindent(&source, start..end);
331-
let result_bytes = indent_lines::<String>(0, &extracted, source.contains('\t'));
335+
let (_, is_tab) = get_indent_at_offset_with_tab::<String>(&source.as_bytes()[..start]);
336+
let result_bytes = indent_lines::<String>(0, &extracted, is_tab);
332337
let actual = std::str::from_utf8(&result_bytes).unwrap();
333338
assert_eq!(actual, expected);
334339
}

crates/ast-engine/src/replacer/template.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ fn replace_fixer<D: Doc>(fixer: &TemplateFix, env: &MetaVarEnv<'_, D>) -> Underl
116116
ret.extend_from_slice(&D::Source::decode_str(frag));
117117
}
118118
for ((var, indent, is_tab), frag) in vars.zip(frags) {
119-
if let Some(bytes) = maybe_get_var(env, var, indent.to_owned(), is_tab.to_owned()) {
119+
if let Some(bytes) = maybe_get_var(env, var, *indent, *is_tab) {
120120
ret.extend_from_slice(&bytes);
121121
}
122122
ret.extend_from_slice(&D::Source::decode_str(frag));

0 commit comments

Comments
 (0)