Skip to content

Commit a64ae89

Browse files
jeckersbcgwalters
authored andcommitted
kernel_cmdline: Consolidate parsing and add iter_bytes/iter_str
This commit refactors the kernel_cmdline parsing implementation to reduce code duplication and provide a clearer separation of concerns. Key changes: 1. Introduced CmdlineIterBytes as a dedicated iterator for splitting the command line into raw parameter byte slices. This centralizes the quote-aware whitespace splitting logic in one place. 2. Refactored CmdlineIter to wrap CmdlineIterBytes instead of duplicating the splitting logic. This eliminates redundant code and ensures consistent behavior. 3. Consolidated Parameter::parse and Parameter::parse_one into a single parse() method. Parameter::parse now uses CmdlineIterBytes directly to find the first parameter, then passes it to parse_internal() which assumes the input is already a single parameter. 4. Added iter_bytes() and iter_str() methods to Cmdline for cases where users need raw byte slices or strings without full Parameter parsing overhead. 5. Removed utf8::Parameter::parse_one() in favor of a simplified parse() that wraps the bytes implementation. Assisted-by: Claude Code (Sonnet 4.5) Signed-off-by: John Eckersberg <[email protected]>
1 parent 0efad6a commit a64ae89

File tree

2 files changed

+152
-115
lines changed

2 files changed

+152
-115
lines changed

crates/kernel_cmdline/src/bytes.rs

Lines changed: 116 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,46 @@ impl<'a> From<Vec<u8>> for Cmdline<'a> {
3838
///
3939
/// This is created by the `iter` method on `Cmdline`.
4040
#[derive(Debug)]
41-
pub struct CmdlineIter<'a>(&'a [u8]);
41+
pub struct CmdlineIter<'a>(CmdlineIterBytes<'a>);
4242

4343
impl<'a> Iterator for CmdlineIter<'a> {
4444
type Item = Parameter<'a>;
4545

4646
fn next(&mut self) -> Option<Self::Item> {
47-
let (param, rest) = Parameter::parse_one(self.0);
47+
self.0.next().and_then(Parameter::parse_internal)
48+
}
49+
}
50+
51+
/// An iterator over kernel command line parameters as byte slices.
52+
///
53+
/// This is created by the `iter_bytes` method on `Cmdline`.
54+
#[derive(Debug)]
55+
pub struct CmdlineIterBytes<'a>(&'a [u8]);
56+
57+
impl<'a> Iterator for CmdlineIterBytes<'a> {
58+
type Item = &'a [u8];
59+
60+
fn next(&mut self) -> Option<Self::Item> {
61+
let input = self.0.trim_ascii_start();
62+
63+
if input.is_empty() {
64+
self.0 = input;
65+
return None;
66+
}
67+
68+
let mut in_quotes = false;
69+
let end = input.iter().position(move |c| {
70+
if *c == b'"' {
71+
in_quotes = !in_quotes;
72+
}
73+
!in_quotes && c.is_ascii_whitespace()
74+
});
75+
76+
let end = end.unwrap_or(input.len());
77+
let (param, rest) = input.split_at(end);
4878
self.0 = rest;
49-
param
79+
80+
Some(param)
5081
}
5182
}
5283

@@ -71,7 +102,15 @@ impl<'a> Cmdline<'a> {
71102
/// unquoted whitespace characters. Parameters are parsed as either
72103
/// key-only switches or key=value pairs.
73104
pub fn iter(&'a self) -> CmdlineIter<'a> {
74-
CmdlineIter(&self.0)
105+
CmdlineIter(self.iter_bytes())
106+
}
107+
108+
/// Returns an iterator over all parameters in the command line as byte slices.
109+
///
110+
/// This is similar to `iter()` but yields `&[u8]` directly instead of `Parameter`,
111+
/// which can be more convenient when you just need the raw byte representation.
112+
pub fn iter_bytes(&self) -> CmdlineIterBytes<'_> {
113+
CmdlineIterBytes(&self.0)
75114
}
76115

77116
/// Returns an iterator over all parameters in the command line
@@ -369,51 +408,27 @@ impl<'a> Parameter<'a> {
369408
/// be constructed from the input. This occurs when the input is
370409
/// either empty or contains only whitespace.
371410
///
372-
/// Any remaining bytes not consumed from the input are discarded.
411+
/// If the input contains multiple parameters, only the first one
412+
/// is parsed and the rest is discarded.
373413
pub fn parse<T: AsRef<[u8]> + ?Sized>(input: &'a T) -> Option<Self> {
374-
Self::parse_one(input).0
414+
CmdlineIterBytes(input.as_ref())
415+
.next()
416+
.and_then(Self::parse_internal)
375417
}
376418

377-
/// Attempt to parse a single command line parameter from a slice
378-
/// of bytes.
379-
///
380-
/// The first tuple item contains the parsed parameter, or `None`
381-
/// if a Parameter could not be constructed from the input. This
382-
/// occurs when the input is either empty or contains only
383-
/// whitespace.
419+
/// Parse a parameter from a byte slice that contains exactly one parameter.
384420
///
385-
/// Any remaining bytes not consumed from the input are returned
386-
/// as the second tuple item.
387-
pub fn parse_one<T: AsRef<[u8]> + ?Sized>(input: &'a T) -> (Option<Self>, &'a [u8]) {
388-
let input = input.as_ref().trim_ascii_start();
389-
390-
if input.is_empty() {
391-
return (None, input);
392-
}
393-
394-
let mut in_quotes = false;
395-
let end = input.iter().position(move |c| {
396-
if *c == b'"' {
397-
in_quotes = !in_quotes;
398-
}
399-
!in_quotes && c.is_ascii_whitespace()
400-
});
401-
402-
let end = match end {
403-
Some(end) => end,
404-
None => input.len(),
405-
};
406-
407-
let (input, rest) = input.split_at(end);
408-
421+
/// This is an internal method that assumes the input has already been
422+
/// split into a single parameter (e.g., by CmdlineIterBytes).
423+
fn parse_internal(input: &'a [u8]) -> Option<Self> {
409424
let equals = input.iter().position(|b| *b == b'=');
410425

411-
let ret = match equals {
412-
None => Self {
426+
match equals {
427+
None => Some(Self {
413428
parameter: input,
414429
key: ParameterKey(input),
415430
value: None,
416-
},
431+
}),
417432
Some(i) => {
418433
let (key, mut value) = input.split_at(i);
419434
let key = ParameterKey(key);
@@ -428,15 +443,13 @@ impl<'a> Parameter<'a> {
428443
v.strip_suffix(b"\"").unwrap_or(v)
429444
};
430445

431-
Self {
446+
Some(Self {
432447
parameter: input,
433448
key,
434449
value: Some(value),
435-
}
450+
})
436451
}
437-
};
438-
439-
(Some(ret), rest)
452+
}
440453
}
441454

442455
/// Returns the key part of the parameter
@@ -479,27 +492,19 @@ mod tests {
479492
}
480493

481494
#[test]
482-
fn test_parameter_parse_one() {
483-
let (p, rest) = Parameter::parse_one(b"foo");
484-
let p = p.unwrap();
495+
fn test_parameter_parse() {
496+
let p = Parameter::parse(b"foo").unwrap();
485497
assert_eq!(p.key.0, b"foo");
486498
assert_eq!(p.value, None);
487-
assert_eq!(rest, "".as_bytes());
488499

489-
// should consume one parameter and return the rest of the input
490-
let (p, rest) = Parameter::parse_one(b"foo=bar baz");
491-
let p = p.unwrap();
500+
// should parse only the first parameter and discard the rest of the input
501+
let p = Parameter::parse(b"foo=bar baz").unwrap();
492502
assert_eq!(p.key.0, b"foo");
493503
assert_eq!(p.value, Some(b"bar".as_slice()));
494-
assert_eq!(rest, " baz".as_bytes());
495504

496505
// should return None on empty or whitespace inputs
497-
let (p, rest) = Parameter::parse_one(b"");
498-
assert!(p.is_none());
499-
assert_eq!(rest, b"".as_slice());
500-
let (p, rest) = Parameter::parse_one(b" ");
501-
assert!(p.is_none());
502-
assert_eq!(rest, b"".as_slice());
506+
assert!(Parameter::parse(b"").is_none());
507+
assert!(Parameter::parse(b" ").is_none());
503508
}
504509

505510
#[test]
@@ -534,11 +539,10 @@ mod tests {
534539

535540
#[test]
536541
fn test_parameter_internal_key_whitespace() {
537-
let (p, rest) = Parameter::parse_one("foo bar=baz".as_bytes());
538-
let p = p.unwrap();
542+
// parse should only consume the first parameter
543+
let p = Parameter::parse("foo bar=baz".as_bytes()).unwrap();
539544
assert_eq!(p.key.0, b"foo");
540545
assert_eq!(p.value, None);
541-
assert_eq!(rest, b" bar=baz");
542546
}
543547

544548
#[test]
@@ -923,4 +927,54 @@ mod tests {
923927
assert_eq!(params[1], param("baz=qux"));
924928
assert_eq!(params[2], param("wiz"));
925929
}
930+
931+
#[test]
932+
fn test_iter_bytes_simple() {
933+
let kargs = Cmdline::from(b"foo bar baz");
934+
let params: Vec<_> = kargs.iter_bytes().collect();
935+
936+
assert_eq!(params.len(), 3);
937+
assert_eq!(params[0], b"foo");
938+
assert_eq!(params[1], b"bar");
939+
assert_eq!(params[2], b"baz");
940+
}
941+
942+
#[test]
943+
fn test_iter_bytes_with_values() {
944+
let kargs = Cmdline::from(b"foo=bar baz=qux wiz");
945+
let params: Vec<_> = kargs.iter_bytes().collect();
946+
947+
assert_eq!(params.len(), 3);
948+
assert_eq!(params[0], b"foo=bar");
949+
assert_eq!(params[1], b"baz=qux");
950+
assert_eq!(params[2], b"wiz");
951+
}
952+
953+
#[test]
954+
fn test_iter_bytes_with_quotes() {
955+
let kargs = Cmdline::from(b"foo=\"bar baz\" qux");
956+
let params: Vec<_> = kargs.iter_bytes().collect();
957+
958+
assert_eq!(params.len(), 2);
959+
assert_eq!(params[0], b"foo=\"bar baz\"");
960+
assert_eq!(params[1], b"qux");
961+
}
962+
963+
#[test]
964+
fn test_iter_bytes_extra_whitespace() {
965+
let kargs = Cmdline::from(b" foo bar ");
966+
let params: Vec<_> = kargs.iter_bytes().collect();
967+
968+
assert_eq!(params.len(), 2);
969+
assert_eq!(params[0], b"foo");
970+
assert_eq!(params[1], b"bar");
971+
}
972+
973+
#[test]
974+
fn test_iter_bytes_empty() {
975+
let kargs = Cmdline::from(b"");
976+
let params: Vec<_> = kargs.iter_bytes().collect();
977+
978+
assert_eq!(params.len(), 0);
979+
}
926980
}

crates/kernel_cmdline/src/utf8.rs

Lines changed: 36 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,25 @@ impl<'a> Iterator for CmdlineIter<'a> {
4949
}
5050
}
5151

52+
/// An iterator over UTF-8 kernel command line parameters as string slices.
53+
///
54+
/// This is created by the `iter_str` method on `Cmdline`.
55+
#[derive(Debug)]
56+
pub struct CmdlineIterStr<'a>(bytes::CmdlineIterBytes<'a>);
57+
58+
impl<'a> Iterator for CmdlineIterStr<'a> {
59+
type Item = &'a str;
60+
61+
fn next(&mut self) -> Option<Self::Item> {
62+
// Get the next byte slice from the underlying iterator
63+
let bytes = self.0.next()?;
64+
65+
// Convert to UTF-8 string slice
66+
// SAFETY: We know this is valid UTF-8 since the Cmdline was constructed from valid UTF-8
67+
Some(str::from_utf8(bytes).expect("Parameter bytes come from valid UTF-8 cmdline"))
68+
}
69+
}
70+
5271
impl<'a> Cmdline<'a> {
5372
/// Creates a new empty owned `Cmdline`.
5473
///
@@ -84,6 +103,14 @@ impl<'a> Cmdline<'a> {
84103
CmdlineIter(self.0.iter())
85104
}
86105

106+
/// Returns an iterator over all parameters in the command line as string slices.
107+
///
108+
/// This is similar to `iter()` but yields `&str` directly instead of `Parameter`,
109+
/// which can be more convenient when you just need the string representation.
110+
pub fn iter_str(&self) -> CmdlineIterStr<'_> {
111+
CmdlineIterStr(self.0.iter_bytes())
112+
}
113+
87114
/// Locate a kernel argument with the given key name.
88115
///
89116
/// Returns the first parameter matching the given key, or `None` if not found.
@@ -283,36 +310,8 @@ impl<'a> Parameter<'a> {
283310
/// Returns `Some(Parameter)`, or `None` if a Parameter could not
284311
/// be constructed from the input. This occurs when the input is
285312
/// either empty or contains only whitespace.
286-
///
287-
/// Any remaining characters not consumed from the input are
288-
/// discarded.
289313
pub fn parse<T: AsRef<str> + ?Sized>(input: &'a T) -> Option<Self> {
290-
Self::parse_one(input).0
291-
}
292-
293-
/// Attempt to parse a single command line parameter from a UTF-8
294-
/// string.
295-
///
296-
/// The first tuple item contains the parsed parameter, or `None`
297-
/// if a Parameter could not be constructed from the input. This
298-
/// occurs when the input is either empty or contains only
299-
/// whitespace.
300-
///
301-
/// Any remaining characters not consumed from the input are
302-
/// returned as the second tuple item.
303-
pub fn parse_one<T: AsRef<str> + ?Sized>(input: &'a T) -> (Option<Self>, &'a str) {
304-
let (bytes, rest) = bytes::Parameter::parse_one(input.as_ref().as_bytes());
305-
306-
// SAFETY: we know this is valid UTF-8 since input is &str,
307-
// and `rest` is a subslice of that &str which was split on
308-
// whitespace
309-
let rest = str::from_utf8(rest)
310-
.expect("Splitting UTF-8 on ascii whitespace cannot produce invalid UTF-8 substrings");
311-
312-
match bytes {
313-
Some(p) => (Some(Self(p)), rest),
314-
None => (None, rest),
315-
}
314+
bytes::Parameter::parse(input.as_ref().as_bytes()).map(Self)
316315
}
317316

318317
/// Construct a utf8::Parameter from a bytes::Parameter
@@ -336,13 +335,6 @@ impl<'a> Parameter<'a> {
336335
str::from_utf8(p).expect("We only construct the underlying bytes from valid UTF-8")
337336
})
338337
}
339-
340-
/// Returns the parameter as a &str
341-
pub fn as_str(&'a self) -> &'a str {
342-
// SAFETY: We know this is valid UTF-8 since we only
343-
// construct the underlying `bytes` from valid UTF-8
344-
str::from_utf8(&self.0).expect("We only construct the underlying bytes from valid UTF-8")
345-
}
346338
}
347339

348340
impl<'a> TryFrom<bytes::Parameter<'a>> for Parameter<'a> {
@@ -404,27 +396,19 @@ mod tests {
404396
}
405397

406398
#[test]
407-
fn test_parameter_parse_one() {
408-
let (p, rest) = Parameter::parse_one("foo");
409-
let p = p.unwrap();
399+
fn test_parameter_parse() {
400+
let p = Parameter::parse("foo").unwrap();
410401
assert_eq!(p.key(), "foo".into());
411402
assert_eq!(p.value(), None);
412-
assert_eq!(rest, "");
413403

414-
// should consume one parameter and return the rest of the input
415-
let (p, rest) = Parameter::parse_one("foo=bar baz");
416-
let p = p.unwrap();
404+
// should parse only the first parameter and discard the rest of the input
405+
let p = Parameter::parse("foo=bar baz").unwrap();
417406
assert_eq!(p.key(), "foo".into());
418407
assert_eq!(p.value(), Some("bar"));
419-
assert_eq!(rest, " baz");
420408

421409
// should return None on empty or whitespace inputs
422-
let (p, rest) = Parameter::parse_one("");
423-
assert!(p.is_none());
424-
assert_eq!(rest, "");
425-
let (p, rest) = Parameter::parse_one(" ");
426-
assert!(p.is_none());
427-
assert_eq!(rest, "");
410+
assert!(Parameter::parse("").is_none());
411+
assert!(Parameter::parse(" ").is_none());
428412
}
429413

430414
#[test]
@@ -453,11 +437,10 @@ mod tests {
453437

454438
#[test]
455439
fn test_parameter_internal_key_whitespace() {
456-
let (p, rest) = Parameter::parse_one("foo bar=baz");
457-
let p = p.unwrap();
440+
// parse should only consume the first parameter
441+
let p = Parameter::parse("foo bar=baz").unwrap();
458442
assert_eq!(p.key(), "foo".into());
459443
assert_eq!(p.value(), None);
460-
assert_eq!(rest, " bar=baz");
461444
}
462445

463446
#[test]

0 commit comments

Comments
 (0)