Skip to content

Commit e219a1b

Browse files
authored
Merge pull request #1586 from jeckersb/cmdline-improvements
kernel_cmdline: Refactor parsing to improve quote and whitespace handling
2 parents 6756070 + 4335ba1 commit e219a1b

File tree

1 file changed

+152
-69
lines changed
  • crates/kernel_cmdline/src

1 file changed

+152
-69
lines changed

crates/kernel_cmdline/src/lib.rs

Lines changed: 152 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,22 @@ impl<'a, T: AsRef<[u8]> + ?Sized> From<&'a T> for Cmdline<'a> {
2929
}
3030
}
3131

32+
/// An iterator over kernel command line parameters.
33+
///
34+
/// This is created by the `iter` method on `Cmdline`.
35+
#[derive(Debug)]
36+
pub struct CmdlineIter<'a>(&'a [u8]);
37+
38+
impl<'a> Iterator for CmdlineIter<'a> {
39+
type Item = Parameter<'a>;
40+
41+
fn next(&mut self) -> Option<Self::Item> {
42+
let (param, rest) = Parameter::parse(self.0);
43+
self.0 = rest;
44+
param
45+
}
46+
}
47+
3248
impl<'a> Cmdline<'a> {
3349
/// Reads the kernel command line from `/proc/cmdline`.
3450
///
@@ -42,17 +58,8 @@ impl<'a> Cmdline<'a> {
4258
/// Properly handles quoted values containing whitespace and splits on
4359
/// unquoted whitespace characters. Parameters are parsed as either
4460
/// key-only switches or key=value pairs.
45-
pub fn iter(&'a self) -> impl Iterator<Item = Parameter<'a>> + 'a {
46-
let mut in_quotes = false;
47-
48-
self.0
49-
.split(move |c| {
50-
if *c == b'"' {
51-
in_quotes = !in_quotes;
52-
}
53-
!in_quotes && c.is_ascii_whitespace()
54-
})
55-
.map(Parameter::from)
61+
pub fn iter(&'a self) -> CmdlineIter<'a> {
62+
CmdlineIter(&self.0)
5663
}
5764

5865
/// Locate a kernel argument with the given key name.
@@ -138,24 +145,12 @@ impl<'a> std::ops::Deref for ParameterKey<'a> {
138145
}
139146
}
140147

141-
impl<'a> From<&'a [u8]> for ParameterKey<'a> {
142-
fn from(value: &'a [u8]) -> Self {
143-
Self(value)
144-
}
145-
}
146-
147148
/// A single kernel command line parameter key that is known to be UTF-8.
148149
///
149150
/// Otherwise the same as [`ParameterKey`].
150151
#[derive(Debug, Eq)]
151152
pub struct ParameterKeyStr<'a>(&'a str);
152153

153-
impl<'a> From<&'a str> for ParameterKeyStr<'a> {
154-
fn from(value: &'a str) -> Self {
155-
Self(value)
156-
}
157-
}
158-
159154
/// A single kernel command line parameter.
160155
#[derive(Debug, Eq)]
161156
pub struct Parameter<'a> {
@@ -179,34 +174,41 @@ pub struct ParameterStr<'a> {
179174
}
180175

181176
impl<'a> Parameter<'a> {
182-
/// Convert this parameter to a UTF-8 string parameter, if possible.
177+
/// Attempt to parse a single command line parameter from a slice
178+
/// of bytes.
183179
///
184-
/// Returns `None` if the parameter contains invalid UTF-8.
185-
pub fn to_str(&self) -> Option<ParameterStr<'a>> {
186-
let Ok(parameter) = std::str::from_utf8(self.parameter) else {
187-
return None;
180+
/// The first tuple item contains the parsed parameter, or `None`
181+
/// if a Parameter could not be constructed from the input. This
182+
/// occurs when the input is either empty or contains only
183+
/// whitespace.
184+
///
185+
/// Any remaining bytes not consumed from the input are returned
186+
/// as the second tuple item.
187+
pub fn parse(input: &'a [u8]) -> (Option<Self>, &'a [u8]) {
188+
let input = input.trim_ascii_start();
189+
190+
if input.is_empty() {
191+
return (None, input);
192+
}
193+
194+
let mut in_quotes = false;
195+
let end = input.iter().position(move |c| {
196+
if *c == b'"' {
197+
in_quotes = !in_quotes;
198+
}
199+
!in_quotes && c.is_ascii_whitespace()
200+
});
201+
202+
let end = match end {
203+
Some(end) => end,
204+
None => input.len(),
188205
};
189-
Some(ParameterStr::from(parameter))
190-
}
191-
}
192206

193-
impl<'a> AsRef<str> for ParameterStr<'a> {
194-
fn as_ref(&self) -> &str {
195-
self.parameter
196-
}
197-
}
207+
let (input, rest) = input.split_at(end);
198208

199-
impl<'a, T: AsRef<[u8]> + ?Sized> From<&'a T> for Parameter<'a> {
200-
/// Parses a parameter from raw bytes.
201-
///
202-
/// Splits on the first `=` character to separate key and value.
203-
/// Strips only the outermost pair of double quotes from values.
204-
/// If no `=` is found, treats the entire input as a key-only parameter.
205-
fn from(input: &'a T) -> Self {
206-
let input = input.as_ref();
207209
let equals = input.iter().position(|b| *b == b'=');
208210

209-
match equals {
211+
let ret = match equals {
210212
None => Self {
211213
parameter: input,
212214
key: ParameterKey(input),
@@ -233,7 +235,25 @@ impl<'a, T: AsRef<[u8]> + ?Sized> From<&'a T> for Parameter<'a> {
233235
value: Some(value),
234236
}
235237
}
236-
}
238+
};
239+
240+
(Some(ret), rest)
241+
}
242+
243+
/// Convert this parameter to a UTF-8 string parameter, if possible.
244+
///
245+
/// Returns `None` if the parameter contains invalid UTF-8.
246+
pub fn to_str(&self) -> Option<ParameterStr<'a>> {
247+
let Ok(parameter) = std::str::from_utf8(self.parameter) else {
248+
return None;
249+
};
250+
Some(ParameterStr::from(parameter))
251+
}
252+
}
253+
254+
impl<'a> AsRef<str> for ParameterStr<'a> {
255+
fn as_ref(&self) -> &str {
256+
self.parameter
237257
}
238258
}
239259

@@ -261,7 +281,12 @@ impl PartialEq for ParameterKey<'_> {
261281
}
262282
}
263283

264-
impl<'a> From<&'a str> for ParameterStr<'a> {
284+
impl<'a> ParameterStr<'a> {
285+
/// This intentionally does not implement From<&'a str> to prevent
286+
/// external users from constructing instances of `ParameterStr`.
287+
/// We rely on the fact that it is derived from `Parameter` to
288+
/// ensure that the input has already been split properly and that
289+
/// the input contains only one command line parameter.
265290
fn from(parameter: &'a str) -> Self {
266291
let (key, value) = if let Some((key, value)) = parameter.split_once('=') {
267292
let value = value
@@ -299,34 +324,79 @@ impl<'a> PartialEq for ParameterKeyStr<'a> {
299324
mod tests {
300325
use super::*;
301326

327+
// convenience method for tests
328+
fn param(s: &str) -> Parameter<'_> {
329+
Parameter::parse(s.as_bytes()).0.unwrap()
330+
}
331+
332+
#[test]
333+
fn test_parameter_parse() {
334+
let (p, rest) = Parameter::parse(b"foo");
335+
let p = p.unwrap();
336+
assert_eq!(p.key.0, b"foo");
337+
assert_eq!(p.value, None);
338+
assert_eq!(rest, "".as_bytes());
339+
340+
// should consume one parameter and return the rest of the input
341+
let (p, rest) = Parameter::parse(b"foo=bar baz");
342+
let p = p.unwrap();
343+
assert_eq!(p.key.0, b"foo");
344+
assert_eq!(p.value, Some(b"bar".as_slice()));
345+
assert_eq!(rest, " baz".as_bytes());
346+
347+
// should return None on empty or whitespace inputs
348+
let (p, rest) = Parameter::parse(b"");
349+
assert!(p.is_none());
350+
assert_eq!(rest, b"".as_slice());
351+
let (p, rest) = Parameter::parse(b" ");
352+
assert!(p.is_none());
353+
assert_eq!(rest, b"".as_slice());
354+
}
355+
302356
#[test]
303357
fn test_parameter_simple() {
304-
let switch = Parameter::from("foo");
358+
let switch = param("foo");
305359
assert_eq!(switch.key.0, b"foo");
306360
assert_eq!(switch.value, None);
307361

308-
let kv = Parameter::from("bar=baz");
362+
let kv = param("bar=baz");
309363
assert_eq!(kv.key.0, b"bar");
310364
assert_eq!(kv.value, Some(b"baz".as_slice()));
311365
}
312366

313367
#[test]
314368
fn test_parameter_quoted() {
315-
let p = Parameter::from("foo=\"quoted value\"");
369+
let p = param("foo=\"quoted value\"");
316370
assert_eq!(p.value, Some(b"quoted value".as_slice()));
317371
}
318372

373+
#[test]
374+
fn test_parameter_extra_whitespace() {
375+
let p = param(" foo=bar ");
376+
assert_eq!(p.key.0, b"foo");
377+
assert_eq!(p.value, Some(b"bar".as_slice()));
378+
}
379+
380+
#[test]
381+
fn test_parameter_internal_key_whitespace() {
382+
let (p, rest) = Parameter::parse("foo bar=baz".as_bytes());
383+
let p = p.unwrap();
384+
assert_eq!(p.key.0, b"foo");
385+
assert_eq!(p.value, None);
386+
assert_eq!(rest, b" bar=baz");
387+
}
388+
319389
#[test]
320390
fn test_parameter_pathological() {
321391
// valid things that certified insane people would do
322392

323393
// quotes don't get removed from keys
324-
let p = Parameter::from("\"\"\"");
394+
let p = param("\"\"\"");
325395
assert_eq!(p.key.0, b"\"\"\"");
326396

327397
// quotes only get stripped from the absolute ends of values
328-
let p = Parameter::from("foo=\"internal \" quotes \" are ok\"");
329-
assert_eq!(p.value, Some(b"internal \" quotes \" are ok".as_slice()));
398+
let p = param("foo=\"internal\"quotes\"are\"ok\"");
399+
assert_eq!(p.value, Some(b"internal\"quotes\"are\"ok".as_slice()));
330400

331401
// non-UTF8 things are in fact valid
332402
let non_utf8_byte = b"\xff";
@@ -335,36 +405,37 @@ mod tests {
335405
assert!(failed_conversion.is_err());
336406
let mut p = b"foo=".to_vec();
337407
p.push(non_utf8_byte[0]);
338-
let p = Parameter::from(&p);
408+
let (p, _rest) = Parameter::parse(&p);
409+
let p = p.unwrap();
339410
assert_eq!(p.value, Some(non_utf8_byte.as_slice()));
340411
}
341412

342413
#[test]
343414
fn test_parameter_equality() {
344415
// substrings are not equal
345-
let foo = Parameter::from("foo");
346-
let bar = Parameter::from("foobar");
416+
let foo = param("foo");
417+
let bar = param("foobar");
347418
assert_ne!(foo, bar);
348419
assert_ne!(bar, foo);
349420

350421
// dashes and underscores are treated equally
351-
let dashes = Parameter::from("a-delimited-param");
352-
let underscores = Parameter::from("a_delimited_param");
422+
let dashes = param("a-delimited-param");
423+
let underscores = param("a_delimited_param");
353424
assert_eq!(dashes, underscores);
354425

355426
// same key, same values is equal
356-
let dashes = Parameter::from("a-delimited-param=same_values");
357-
let underscores = Parameter::from("a_delimited_param=same_values");
427+
let dashes = param("a-delimited-param=same_values");
428+
let underscores = param("a_delimited_param=same_values");
358429
assert_eq!(dashes, underscores);
359430

360431
// same key, different values is not equal
361-
let dashes = Parameter::from("a-delimited-param=different_values");
362-
let underscores = Parameter::from("a_delimited_param=DiFfErEnT_valUEZ");
432+
let dashes = param("a-delimited-param=different_values");
433+
let underscores = param("a_delimited_param=DiFfErEnT_valUEZ");
363434
assert_ne!(dashes, underscores);
364435

365436
// mixed variants are never equal
366-
let switch = Parameter::from("same_key");
367-
let keyvalue = Parameter::from("same_key=but_with_a_value");
437+
let switch = param("same_key");
438+
let keyvalue = param("same_key=but_with_a_value");
368439
assert_ne!(switch, keyvalue);
369440
}
370441

@@ -375,9 +446,9 @@ mod tests {
375446
let kargs = Cmdline::from(b"foo=bar,bar2 baz=fuz wiz".as_slice());
376447
let mut iter = kargs.iter();
377448

378-
assert_eq!(iter.next(), Some(Parameter::from(b"foo=bar,bar2")));
379-
assert_eq!(iter.next(), Some(Parameter::from(b"baz=fuz")));
380-
assert_eq!(iter.next(), Some(Parameter::from(b"wiz")));
449+
assert_eq!(iter.next(), Some(param("foo=bar,bar2")));
450+
assert_eq!(iter.next(), Some(param("baz=fuz")));
451+
assert_eq!(iter.next(), Some(param("wiz")));
381452
assert_eq!(iter.next(), None);
382453

383454
// Test the find API
@@ -416,6 +487,17 @@ mod tests {
416487
assert_eq!(p.value.unwrap(), b"2");
417488
}
418489

490+
#[test]
491+
fn test_kargs_extra_whitespace() {
492+
let kargs = Cmdline::from(b" foo=bar baz=fuz wiz ".as_slice());
493+
let mut iter = kargs.iter();
494+
495+
assert_eq!(iter.next(), Some(param("foo=bar")));
496+
assert_eq!(iter.next(), Some(param("baz=fuz")));
497+
assert_eq!(iter.next(), Some(param("wiz")));
498+
assert_eq!(iter.next(), None);
499+
}
500+
419501
#[test]
420502
fn test_value_of() {
421503
let kargs = Cmdline::from(b"foo=bar baz=qux switch".as_slice());
@@ -546,13 +628,14 @@ mod tests {
546628

547629
#[test]
548630
fn test_param_to_str() {
549-
let p = Parameter::from("foo=bar");
631+
let p = param("foo=bar");
550632
let p_str = p.to_str().unwrap();
551633
assert_eq!(p_str, ParameterStr::from("foo=bar"));
552634
let non_utf8_byte = b"\xff";
553635
let mut p_u8 = b"foo=".to_vec();
554636
p_u8.push(non_utf8_byte[0]);
555-
let p = Parameter::from(&p_u8);
637+
let (p, _rest) = Parameter::parse(&p_u8);
638+
let p = p.unwrap();
556639
assert!(p.to_str().is_none());
557640
}
558641

0 commit comments

Comments
 (0)