Skip to content

Commit 88b3915

Browse files
authored
fix(assembly-syntax): properly handle quoted path components (#2618)
Prior to this commit, when constructing PathBufs from a string containing quoted components would improperly strip the quotes from those components, which would result in later iteration over components of that path treating that part of the path as unquoted. For example, given the following path string: `root:root_ns@1.0.0::module::"function::with::quoted::symbol_name"` Constructing a PathBuf from this string would succeed as expected, but would encode the path internally as if it had been passed as: `root:root_ns@1.0.0::module::function::with::quoted::symbol_name` If we later iterate over components of the path, the part that should have been treated as a single component, i.e. `"function::with::quoted::symbol_name`, would instead be treated as multiple components, e.g. `function`, `with`, `quoted`, and `symbol_name`. This commit changes how we handle quoted components in the Components iterator, so that the quotes are preserved. Additionally, a new `PathBuf::push_component` method was added, which allows one to explicitly push a string as a quoted component, ensuring the component is quoted if not already. A regression test was added that covers the case where this was discovered, and further ensures that constructing a PathBuf from a single string, or building it up via `push_component` produce identical results. A related issue was also identified and fixed, namely that internally the Components iterator was not consuming trailing/leading path separators after parsing a quoted component, causing unexpected results when using `Components::as_path`. This has been fixed, making the behavior consistent across both quoted and unquoted identifiers.
1 parent 3681a89 commit 88b3915

File tree

4 files changed

+278
-60
lines changed

4 files changed

+278
-60
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 0.20.4 (Unreleased)
4+
5+
- Fixed issue with handling of quoted components in `PathBuf` [#2618](https://github.com/0xMiden/miden-vm/pull/2618)
6+
37
## 0.20.3 (2026-01-27)
48

59
- Fixed issue where exports of a Library did not have attributes serialized [#2608](https://github.com/0xMiden/miden-vm/issues/2608)

crates/assembly-syntax/src/ast/path/components.rs

Lines changed: 126 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ impl<'a> PathComponent<'a> {
4848
Self::Normal(id) => id.chars().count(),
4949
}
5050
}
51+
52+
/// Returns true if this path component is a quoted string
53+
pub fn is_quoted(&self) -> bool {
54+
matches!(self, Self::Normal(component) if component.starts_with('"') && component.ends_with('"'))
55+
}
56+
57+
/// Returns true if this path component requires quoting when displayed/stored as a string
58+
pub fn requires_quoting(&self) -> bool {
59+
matches!(self, Self::Normal(component) if component.contains("::"))
60+
}
5161
}
5262

5363
impl PartialEq<str> for PathComponent<'_> {
@@ -72,17 +82,25 @@ impl fmt::Display for PathComponent<'_> {
7282
/// Returns an iterator over the path components represented in the provided source.
7383
///
7484
/// A path consists of at list of components separated by `::` delimiter. A path must contain
75-
/// at least one component.
85+
/// at least one component. Path components may be quoted, in which `::` delimiters are ignored.
86+
/// Quoted path components may also contain characters that are not otherwise valid identifiers in
87+
/// Miden Assembly.
88+
///
89+
/// Note that quoted components may not contain nested quotes - the appearance of a nested quote in
90+
/// a quoted component will be treated as a closing quote resulting in unexpected behavior or
91+
/// validation errors as a result.
7692
///
7793
/// # Errors
7894
///
7995
/// Returns an error if:
8096
///
8197
/// * The path is empty.
8298
/// * Any component of the path is empty.
83-
/// * Any component is not a valid identifier (quoted or unquoted) in Miden Assembly syntax, i.e.
84-
/// starts with an ASCII alphabetic character, contains only printable ASCII characters, except
85-
/// for `::`, which must only be used as a path separator.
99+
/// * Any quoted component is missing a closing/opening quote (depending on order of iteration)
100+
/// * Any unquoted component is not a valid identifier (quoted or unquoted) in Miden Assembly
101+
/// syntax, i.e. starts with an ASCII alphabetic character, contains only printable ASCII
102+
/// characters, except for `::`, which must only be used as a path separator.
103+
#[derive(Debug)]
86104
pub struct Iter<'a> {
87105
components: Components<'a>,
88106
}
@@ -92,7 +110,10 @@ impl<'a> Iter<'a> {
92110
Self {
93111
components: Components {
94112
path,
113+
original: path,
114+
front_pos: 0,
95115
front: State::Start,
116+
back_pos: path.len(),
96117
back: State::Body,
97118
},
98119
}
@@ -139,12 +160,16 @@ impl<'a> DoubleEndedIterator for Iter<'a> {
139160
}
140161

141162
/// The underlying path component iterator used by [Iter]
163+
#[derive(Debug)]
142164
struct Components<'a> {
165+
original: &'a str,
143166
/// The path left to parse components from
144167
path: &'a str,
145168
// To support double-ended iteration, these states keep tack of what has been produced from
146169
// each end
170+
front_pos: usize,
147171
front: State,
172+
back_pos: usize,
148173
back: State,
149174
}
150175

@@ -155,9 +180,9 @@ enum State {
155180
// We're parsing components of the path
156181
Body,
157182
// We've started parsing a quoted component
158-
QuoteOpened,
183+
QuoteOpened(usize),
159184
// We've parsed a quoted component
160-
QuoteClosed,
185+
QuoteClosed(usize),
161186
// We're at the end of the path
162187
Done,
163188
}
@@ -167,7 +192,7 @@ impl<'a> Components<'a> {
167192
match (self.front, self.back) {
168193
(State::Done, _) => true,
169194
(_, State::Done) => true,
170-
(State::Body | State::QuoteOpened | State::QuoteClosed, State::Start) => true,
195+
(State::Body | State::QuoteOpened(_) | State::QuoteClosed(_), State::Start) => true,
171196
(..) => false,
172197
}
173198
}
@@ -177,12 +202,17 @@ impl<'a> Iterator for Components<'a> {
177202
type Item = Result<PathComponent<'a>, PathError>;
178203

179204
fn next(&mut self) -> Option<Self::Item> {
180-
while !self.finished() {
205+
// This is used when consuming a quoted item, to hold the result of the QuoteOpened state
206+
// until we've finished transitioning through the QuoteClosed state. It is never used
207+
// otherwise.
208+
let mut quote_opened = None;
209+
while !self.finished() || quote_opened.is_some() {
181210
match self.front {
182211
State::Start => match self.path.strip_prefix("::") {
183212
Some(rest) => {
184213
self.path = rest;
185214
self.front = State::Body;
215+
self.front_pos += 2;
186216
return Some(Ok(PathComponent::Root));
187217
},
188218
None if self.path.starts_with(Path::KERNEL_PATH)
@@ -197,18 +227,21 @@ impl<'a> Iterator for Components<'a> {
197227
},
198228
State::Body => {
199229
if let Some(rest) = self.path.strip_prefix('"') {
200-
self.front = State::QuoteOpened;
230+
self.front = State::QuoteOpened(self.front_pos);
231+
self.front_pos += 1;
201232
self.path = rest;
202233
continue;
203234
}
204235
match self.path.split_once("::") {
205236
Some(("", rest)) => {
206237
self.path = rest;
238+
self.front_pos += 2;
207239
return Some(Err(PathError::InvalidComponent(
208240
crate::ast::IdentError::Empty,
209241
)));
210242
},
211243
Some((component, rest)) => {
244+
self.front_pos += component.len() + 2;
212245
if rest.is_empty() {
213246
self.path = "::";
214247
} else {
@@ -233,40 +266,55 @@ impl<'a> Iterator for Components<'a> {
233266
{
234267
return Some(Err(err));
235268
}
269+
self.front_pos += component.len();
236270
return Some(Ok(PathComponent::Normal(component)));
237271
},
238272
}
239273
},
240-
State::QuoteOpened => match self.path.split_once('"') {
274+
State::QuoteOpened(opened_at) => match self.path.split_once('"') {
241275
Some(("", rest)) => {
242276
self.path = rest;
243-
self.front = State::QuoteClosed;
244-
return Some(Err(PathError::EmptyComponent));
277+
self.front = State::QuoteClosed(self.front_pos);
278+
self.front_pos += 1;
279+
quote_opened = Some(Err(PathError::EmptyComponent));
245280
},
246281
Some((quoted, rest)) => {
247282
self.path = rest;
248-
self.front = State::QuoteClosed;
249-
return Some(Ok(PathComponent::Normal(quoted)));
283+
self.front_pos += quoted.len();
284+
self.front = State::QuoteClosed(self.front_pos);
285+
self.front_pos += 1;
286+
let quoted = &self.original[opened_at..self.front_pos];
287+
quote_opened = Some(Ok(PathComponent::Normal(quoted)));
250288
},
251289
None => {
252290
self.front = State::Done;
291+
self.front_pos += self.path.len();
253292
return Some(Err(PathError::UnclosedQuotedComponent));
254293
},
255294
},
256-
State::QuoteClosed => {
295+
State::QuoteClosed(_) => {
257296
if self.path.is_empty() {
258297
self.front = State::Done;
259-
continue;
298+
} else {
299+
match self.path.strip_prefix("::") {
300+
Some(rest) => {
301+
self.path = rest;
302+
self.front = State::Body;
303+
self.front_pos += 2;
304+
},
305+
// If we would raise an error, but we have a quoted component to return
306+
// first, leave the state untouched, return the quoted component, and we
307+
// will return here on the next call to `next_back`
308+
None if quote_opened.is_some() => (),
309+
None => {
310+
self.front = State::Done;
311+
return Some(Err(PathError::MissingPathSeparator));
312+
},
313+
}
260314
}
261-
match self.path.strip_prefix("::") {
262-
Some(rest) => {
263-
self.path = rest;
264-
self.front = State::Body;
265-
},
266-
None => {
267-
self.front = State::Done;
268-
return Some(Err(PathError::MissingPathSeparator));
269-
},
315+
316+
if quote_opened.is_some() {
317+
return quote_opened;
270318
}
271319
},
272320
State::Done => break,
@@ -279,13 +327,20 @@ impl<'a> Iterator for Components<'a> {
279327

280328
impl<'a> DoubleEndedIterator for Components<'a> {
281329
fn next_back(&mut self) -> Option<Self::Item> {
282-
while !self.finished() {
330+
// This is used when consuming a quoted item, to hold the result of the QuoteClosed state
331+
// until we've finished transitioning through the QuoteOpened state. It is never used
332+
// otherwise.
333+
let mut quote_closed = None;
334+
while !self.finished() || quote_closed.is_some() {
283335
match self.back {
284336
State::Start => {
285337
self.back = State::Done;
286338
match self.path {
287339
"" => break,
288-
"::" => return Some(Ok(PathComponent::Root)),
340+
"::" => {
341+
self.back_pos = 0;
342+
return Some(Ok(PathComponent::Root));
343+
},
289344
other => {
290345
assert!(
291346
other.starts_with(Path::KERNEL_PATH)
@@ -298,16 +353,19 @@ impl<'a> DoubleEndedIterator for Components<'a> {
298353
},
299354
State::Body => {
300355
if let Some(rest) = self.path.strip_suffix('"') {
301-
self.back = State::QuoteClosed;
356+
self.back = State::QuoteClosed(self.back_pos);
357+
self.back_pos -= 1;
302358
self.path = rest;
303359
continue;
304360
}
305361
match self.path.rsplit_once("::") {
306362
Some(("", "")) => {
307363
self.back = State::Start;
364+
self.back_pos -= 2;
308365
continue;
309366
},
310367
Some((prefix, component)) => {
368+
self.back_pos -= component.len() + 2;
311369
if prefix.is_empty() {
312370
self.path = "::";
313371
self.back = State::Start;
@@ -334,6 +392,7 @@ impl<'a> DoubleEndedIterator for Components<'a> {
334392
} else {
335393
self.path = "";
336394
}
395+
self.back_pos = 0;
337396
if let Err(err) =
338397
Ident::validate(component).map_err(PathError::InvalidComponent)
339398
{
@@ -343,38 +402,52 @@ impl<'a> DoubleEndedIterator for Components<'a> {
343402
},
344403
}
345404
},
346-
State::QuoteOpened => {
405+
State::QuoteOpened(_) => {
347406
if self.path.is_empty() {
348407
self.back = State::Start;
349-
continue;
408+
} else {
409+
match self.path.strip_suffix("::") {
410+
Some("") => {
411+
self.back = State::Start;
412+
self.back_pos -= 2;
413+
},
414+
Some(rest) => {
415+
self.back_pos -= 2;
416+
self.path = rest;
417+
self.back = State::Body;
418+
},
419+
// If we would raise an error, but we have a quoted component to return
420+
// first, leave the state untouched, return the quoted component, and we
421+
// will return here on the next call to `next_back`
422+
None if quote_closed.is_some() => (),
423+
None => {
424+
self.back = State::Done;
425+
return Some(Err(PathError::MissingPathSeparator));
426+
},
427+
}
350428
}
351-
match self.path.strip_suffix("::") {
352-
Some("") => {
353-
self.back = State::Start;
354-
},
355-
Some(rest) => {
356-
self.path = rest;
357-
self.back = State::Body;
358-
},
359-
None => {
360-
self.back = State::Done;
361-
return Some(Err(PathError::MissingPathSeparator));
362-
},
429+
430+
if quote_closed.is_some() {
431+
return quote_closed;
363432
}
364433
},
365-
State::QuoteClosed => match self.path.rsplit_once('"') {
434+
State::QuoteClosed(closed_at) => match self.path.rsplit_once('"') {
366435
Some((rest, "")) => {
436+
self.back_pos -= 1;
367437
self.path = rest;
368-
self.back = State::QuoteOpened;
369-
return Some(Err(PathError::EmptyComponent));
438+
self.back = State::QuoteOpened(self.back_pos);
439+
quote_closed = Some(Err(PathError::EmptyComponent));
370440
},
371441
Some((rest, quoted)) => {
442+
self.back_pos -= quoted.len() + 1;
443+
let quoted = &self.original[self.back_pos..closed_at];
372444
self.path = rest;
373-
self.back = State::QuoteOpened;
374-
return Some(Ok(PathComponent::Normal(quoted)));
445+
self.back = State::QuoteOpened(self.back_pos);
446+
quote_closed = Some(Ok(PathComponent::Normal(quoted)));
375447
},
376448
None => {
377449
self.back = State::Done;
450+
self.back_pos = 0;
378451
return Some(Err(PathError::UnclosedQuotedComponent));
379452
},
380453
},
@@ -518,29 +591,29 @@ mod tests {
518591
#[test]
519592
fn path_with_quoted_component() {
520593
let mut components = Iter::new("\"foo\"");
521-
assert_matches!(components.next(), Some(Ok(PathComponent::Normal("foo"))));
594+
assert_matches!(components.next(), Some(Ok(PathComponent::Normal("\"foo\""))));
522595
assert_matches!(components.next(), None);
523596
}
524597

525598
#[test]
526599
fn path_with_quoted_component_back() {
527600
let mut components = Iter::new("\"foo\"");
528-
assert_matches!(components.next_back(), Some(Ok(PathComponent::Normal("foo"))));
601+
assert_matches!(components.next_back(), Some(Ok(PathComponent::Normal("\"foo\""))));
529602
assert_matches!(components.next_back(), None);
530603
}
531604

532605
#[test]
533606
fn nested_path_with_quoted_component() {
534607
let mut components = Iter::new("foo::\"bar\"");
535608
assert_matches!(components.next(), Some(Ok(PathComponent::Normal("foo"))));
536-
assert_matches!(components.next(), Some(Ok(PathComponent::Normal("bar"))));
609+
assert_matches!(components.next(), Some(Ok(PathComponent::Normal("\"bar\""))));
537610
assert_matches!(components.next(), None);
538611
}
539612

540613
#[test]
541614
fn nested_path_with_quoted_component_back() {
542615
let mut components = Iter::new("foo::\"bar\"");
543-
assert_matches!(components.next_back(), Some(Ok(PathComponent::Normal("bar"))));
616+
assert_matches!(components.next_back(), Some(Ok(PathComponent::Normal("\"bar\""))));
544617
assert_matches!(components.next_back(), Some(Ok(PathComponent::Normal("foo"))));
545618
assert_matches!(components.next_back(), None);
546619
}
@@ -549,7 +622,7 @@ mod tests {
549622
fn nested_path_with_interspersed_quoted_component() {
550623
let mut components = Iter::new("foo::\"bar\"::baz");
551624
assert_matches!(components.next(), Some(Ok(PathComponent::Normal("foo"))));
552-
assert_matches!(components.next(), Some(Ok(PathComponent::Normal("bar"))));
625+
assert_matches!(components.next(), Some(Ok(PathComponent::Normal("\"bar\""))));
553626
assert_matches!(components.next(), Some(Ok(PathComponent::Normal("baz"))));
554627
assert_matches!(components.next(), None);
555628
}
@@ -558,7 +631,7 @@ mod tests {
558631
fn nested_path_with_interspersed_quoted_component_back() {
559632
let mut components = Iter::new("foo::\"bar\"::baz");
560633
assert_matches!(components.next_back(), Some(Ok(PathComponent::Normal("baz"))));
561-
assert_matches!(components.next_back(), Some(Ok(PathComponent::Normal("bar"))));
634+
assert_matches!(components.next_back(), Some(Ok(PathComponent::Normal("\"bar\""))));
562635
assert_matches!(components.next_back(), Some(Ok(PathComponent::Normal("foo"))));
563636
assert_matches!(components.next_back(), None);
564637
}

0 commit comments

Comments
 (0)