Skip to content

Commit 5d6c747

Browse files
committed
feat: support indent
1 parent 9e0e715 commit 5d6c747

File tree

8 files changed

+282
-55
lines changed

8 files changed

+282
-55
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "string_wizard"
3-
version = "0.0.9"
3+
version = "0.0.10"
44
edition = "2021"
55
license = "MIT"
66
description = "manipulate string like wizards"
@@ -10,6 +10,7 @@ description = "manipulate string like wizards"
1010
[dependencies]
1111
index_vec = { version = "0.1.3" }
1212
rustc-hash = { version = "1.1.0" }
13+
regex = "1.10.2"
1314
serde = { version = "1.0", features = ["derive"], optional = true }
1415
serde_json = { version = "1.0", optional = true }
1516

src/basic_types.rs

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,4 @@
1-
use std::{borrow::Cow, fmt::Debug};
2-
3-
#[derive(Debug, Clone)]
4-
pub struct BasicCowStr<'text> {
5-
inner: Cow<'text, str>,
6-
}
7-
8-
impl<'text> BasicCowStr<'text> {
9-
pub fn new(inner: Cow<'text, str>) -> Self {
10-
assert!(
11-
u32::try_from(inner.len()).is_ok(),
12-
"We only support string up to 4GB in size, which is the maximum size of the u32."
13-
);
14-
Self { inner }
15-
}
16-
17-
pub fn len(&self) -> u32 {
18-
// We can safely do converting here because we have already asserted that
19-
// the length of the string is less than or equal `u32::MAX`
20-
self.inner.len() as u32
21-
}
22-
23-
pub fn as_str(&self) -> &str {
24-
self.inner.as_ref()
25-
}
26-
}
27-
28-
impl<'text> std::ops::Deref for BasicCowStr<'text> {
29-
type Target = Cow<'text, str>;
30-
31-
fn deref(&self) -> &Self::Target {
32-
&self.inner
33-
}
34-
}
35-
36-
impl<'text, T: Into<Cow<'text, str>>> From<T> for BasicCowStr<'text> {
37-
fn from(value: T) -> Self {
38-
Self::new(value.into())
39-
}
40-
}
1+
use std::{fmt::Debug};
412

423
// This is basically doing the same thing as `TryInto<u32>`.
434
// If we use `TryInto<u32>`, we need to put `where <T as TryInto<u32>>::Error: Debug` everywhere.

src/chunk.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ impl<'str> Chunk<'str> {
107107
let source_frag = self
108108
.edited_content
109109
.as_ref()
110-
.map(|s| s.as_str())
111-
.unwrap_or_else(|| self.span.text(original_source.as_str()));
110+
.map(|s| s.as_ref())
111+
.unwrap_or_else(|| self.span.text(&original_source));
112112
let outro_iter = self.outro.iter().map(|frag| frag.as_ref());
113113
intro_iter.chain(Some(source_frag)).chain(outro_iter)
114114
}

src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,16 @@ mod magic_string;
77
mod source_map;
88
mod span;
99

10-
type CowStr<'text> = BasicCowStr<'text>;
10+
type CowStr<'text> = Cow<'text, str>;
1111

1212
pub(crate) type TextSize = u32;
1313

14-
use basic_types::BasicCowStr;
14+
use std::borrow::Cow;
15+
1516

1617
pub use crate::{
1718
joiner::{Joiner, JoinerOptions},
18-
magic_string::{mutation::UpdateOptions, MagicString, MagicStringOptions},
19+
magic_string::{mutation::UpdateOptions, MagicString, MagicStringOptions, indent::IndentOptions},
1920
};
2021

2122
#[cfg(feature = "source_map")]

src/magic_string/indent.rs

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
use crate::{CowStr, MagicString};
2+
3+
fn guess_indent_str(source: &str) -> Option<String> {
4+
let mut tabbed_count = 0;
5+
let mut spaced_line = vec![];
6+
for line in source.lines() {
7+
if line.starts_with('\t') {
8+
tabbed_count += 1;
9+
} else if line.starts_with(" ") {
10+
spaced_line.push(line);
11+
}
12+
}
13+
14+
if tabbed_count == 0 && spaced_line.is_empty() {
15+
return None;
16+
}
17+
18+
if tabbed_count >= spaced_line.len() {
19+
return Some("\t".to_string());
20+
}
21+
22+
let min_space_count = spaced_line
23+
.iter()
24+
.map(|line| line.chars().take_while(|c| *c == ' ').count())
25+
.min()
26+
.unwrap_or(0);
27+
28+
let mut indent_str = String::with_capacity(min_space_count);
29+
for _ in 0..min_space_count {
30+
indent_str.push(' ');
31+
}
32+
Some(indent_str)
33+
}
34+
35+
#[derive(Debug, Default)]
36+
pub struct IndentOptions<'a> {
37+
/// MagicString will guess the `indent_str`` from lines of the source if passed `None`.
38+
pub indent_str: Option<&'a str>,
39+
}
40+
41+
impl<'text> MagicString<'text> {
42+
fn ensure_indent_str(&mut self) -> &str {
43+
if self.indent_str.is_none() {
44+
self.indent_str = guess_indent_str(&self.source);
45+
}
46+
47+
self.indent_str.as_deref().unwrap_or(&"\t")
48+
}
49+
50+
pub fn indent(&mut self) -> &mut Self {
51+
self.indent_with(IndentOptions { indent_str: None })
52+
}
53+
54+
/// Shortcut for `indent_with(IndentOptions { indent_str: Some(indent_str), ..Default::default() })`
55+
pub fn indent_str(&mut self, indent_str: &str) -> &mut Self {
56+
self.indent_with(IndentOptions {
57+
indent_str: Some(indent_str),
58+
})
59+
}
60+
61+
pub fn indent_with(&mut self, opts: IndentOptions<'_>) -> &mut Self {
62+
if opts.indent_str.map_or(false, |s| s.is_empty()) {
63+
return self;
64+
}
65+
struct IndentReplacer {
66+
should_indent_next_char: bool,
67+
indent_str: String,
68+
}
69+
70+
impl regex::Replacer for &mut &mut IndentReplacer {
71+
fn replace_append(&mut self, caps: &regex::Captures<'_>, dst: &mut String) {
72+
if self.should_indent_next_char {
73+
dst.push_str(&self.indent_str);
74+
}
75+
for cap in caps.iter() {
76+
if let Some(cap) = cap {
77+
dst.push_str(cap.as_str());
78+
}
79+
}
80+
}
81+
}
82+
83+
fn indent_frag<'text>(
84+
frag: &mut CowStr<'text>,
85+
pattern: &regex::Regex,
86+
mut indent_replacer: &mut IndentReplacer,
87+
) {
88+
match frag {
89+
std::borrow::Cow::Borrowed(str) => {
90+
let might_replaced = pattern.replace_all(str, &mut indent_replacer);
91+
*frag = might_replaced;
92+
}
93+
std::borrow::Cow::Owned(str) => {
94+
let might_replaced = pattern.replace_all(str, &mut indent_replacer);
95+
match might_replaced {
96+
std::borrow::Cow::Owned(replaced) => {
97+
*frag = replaced.into();
98+
}
99+
std::borrow::Cow::Borrowed(_) => {
100+
// Since nothing was replaced, we can just use the original string.
101+
}
102+
}
103+
}
104+
}
105+
}
106+
107+
let indent_str = opts.indent_str.unwrap_or_else(|| self.ensure_indent_str());
108+
109+
let pattern = regex::Regex::new(r"(?m)^[^\r\n]").unwrap();
110+
111+
let mut indent_replacer = IndentReplacer {
112+
should_indent_next_char: true,
113+
indent_str: indent_str.to_string(),
114+
};
115+
116+
for intro_frag in self.intro.iter_mut() {
117+
indent_frag(intro_frag, &pattern, &mut indent_replacer)
118+
}
119+
120+
let mut next_chunk_id = Some(self.first_chunk_idx);
121+
122+
while let Some(chunk_idx) = next_chunk_id {
123+
// Make sure the `next_chunk_id` is updated before we split the chunk. Otherwise, we
124+
// might process the same chunk twice.
125+
next_chunk_id = self.chunks[chunk_idx].next;
126+
if let Some(edited_content) = self.chunks[chunk_idx].edited_content.as_mut() {
127+
indent_frag(edited_content, &pattern, &mut indent_replacer);
128+
} else {
129+
let chunk = &self.chunks[chunk_idx];
130+
let mut line_starts = vec![];
131+
let mut char_index = chunk.start();
132+
for char in chunk.span.text(&self.source).chars() {
133+
debug_assert!(self.source.is_char_boundary(char_index as usize));
134+
if char == '\n' {
135+
indent_replacer.should_indent_next_char = true;
136+
} else if char != '\r' && indent_replacer.should_indent_next_char {
137+
indent_replacer.should_indent_next_char = false;
138+
debug_assert!(!line_starts.contains(&char_index));
139+
line_starts.push(char_index);
140+
}
141+
char_index += char.len_utf8() as u32;
142+
}
143+
for line_start in line_starts {
144+
self.prepend_right(line_start, indent_replacer.indent_str.clone());
145+
}
146+
}
147+
}
148+
149+
for frag in self.outro.iter_mut() {
150+
indent_frag(frag, &pattern, &mut indent_replacer)
151+
}
152+
153+
self
154+
}
155+
}

src/magic_string/mod.rs

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
pub mod indent;
12
pub mod mutation;
23
#[cfg(feature = "source_map")]
34
pub mod source_map;
@@ -23,8 +24,9 @@ pub struct MagicString<'s> {
2324
pub filename: Option<String>,
2425
intro: VecDeque<CowStr<'s>>,
2526
outro: VecDeque<CowStr<'s>>,
26-
27+
indent_str: Option<String>,
2728
source: CowStr<'s>,
29+
source_len: TextSize,
2830
chunks: ChunkVec<'s>,
2931
first_chunk_idx: ChunkIdx,
3032
last_chunk_idx: ChunkIdx,
@@ -41,26 +43,29 @@ impl<'text> MagicString<'text> {
4143

4244
pub fn with_options(source: impl Into<CowStr<'text>>, options: MagicStringOptions) -> Self {
4345
let source = source.into();
44-
let initial_chunk = Chunk::new(Span(0, source.len()));
46+
let source_len = u32::try_from(source.len()).unwrap();
47+
let initial_chunk = Chunk::new(Span(0, source_len));
4548
let mut chunks = ChunkVec::with_capacity(1);
4649
let initial_chunk_idx = chunks.push(initial_chunk);
4750
let mut magic_string = Self {
4851
intro: Default::default(),
4952
outro: Default::default(),
5053
source,
54+
source_len,
5155
first_chunk_idx: initial_chunk_idx,
5256
last_chunk_idx: initial_chunk_idx,
5357
chunks,
5458
chunk_by_start: Default::default(),
5559
chunk_by_end: Default::default(),
5660
// setup options
5761
filename: options.filename,
62+
indent_str: None,
5863
};
5964

6065
magic_string.chunk_by_start.insert(0, initial_chunk_idx);
6166
magic_string
6267
.chunk_by_end
63-
.insert(magic_string.source.len(), initial_chunk_idx);
68+
.insert(source_len, initial_chunk_idx);
6469

6570
magic_string
6671
}
@@ -201,15 +206,13 @@ impl<'text> MagicString<'text> {
201206
/// Chunk{span: (0, 3)} => "abc"
202207
/// Chunk{span: (3, 7)} => "defg"
203208
fn split_at(&mut self, at_index: u32) {
204-
if at_index == 0
205-
|| at_index >= self.source.len()
206-
|| self.chunk_by_end.contains_key(&at_index)
209+
if at_index == 0 || at_index >= self.source_len || self.chunk_by_end.contains_key(&at_index)
207210
{
208211
return;
209212
}
210213

211214
let (mut candidate, mut candidate_idx, search_right) =
212-
if (self.source.len() - at_index) > at_index {
215+
if (self.source_len - at_index) > at_index {
213216
(self.first_chunk(), self.first_chunk_idx, true)
214217
} else {
215218
(self.last_chunk(), self.last_chunk_idx, false)
@@ -250,7 +253,7 @@ impl<'text> MagicString<'text> {
250253

251254
fn by_start_mut(&mut self, text_index: impl AssertIntoU32) -> Option<&mut Chunk<'text>> {
252255
let text_index = text_index.assert_into_u32();
253-
if text_index == self.source.len() {
256+
if text_index == self.source_len {
254257
None
255258
} else {
256259
self.split_at(text_index);
@@ -298,3 +301,4 @@ impl<'a> Iterator for ChunkIter<'a> {
298301
})
299302
}
300303
}
304+

src/magic_string/source_map.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ impl<'s> MagicString<'s> {
5555
sources: vec!["".to_string()],
5656
sources_content: opts
5757
.include_content
58-
.then(|| vec![self.source.as_str().to_string()])
58+
.then(|| vec![self.source.as_ref().to_string()])
5959
.unwrap_or_default(),
6060
mappings,
6161
names,

0 commit comments

Comments
 (0)