Skip to content

Commit db1ed4c

Browse files
committed
core: improve OsStr(ing) helpers
This adds the `os_str_as_bytes_lossy` function, for when we want infallible conversion across platforms, and improves the doc comments of similar functions to be more accurate and better formatted.
1 parent 43229ae commit db1ed4c

File tree

2 files changed

+35
-38
lines changed

2 files changed

+35
-38
lines changed

src/uucore/src/lib/features/quoting_style.rs

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
use std::char::from_digit;
99
use std::ffi::{OsStr, OsString};
1010
use std::fmt;
11-
#[cfg(unix)]
12-
use std::os::unix::ffi::{OsStrExt, OsStringExt};
1311

1412
// These are characters with special meaning in the shell (e.g. bash).
1513
// The first const contains characters that only have a special meaning when they appear at the beginning of a name.
@@ -462,36 +460,18 @@ fn escape_name_inner(name: &[u8], style: &QuotingStyle, dirname: bool) -> Vec<u8
462460

463461
/// Escape a filename with respect to the given style.
464462
pub fn escape_name(name: &OsStr, style: &QuotingStyle) -> OsString {
465-
#[cfg(unix)]
466-
{
467-
let name = name.as_bytes();
468-
OsStringExt::from_vec(escape_name_inner(name, style, false))
469-
}
470-
#[cfg(not(unix))]
471-
{
472-
let name = name.to_string_lossy();
473-
String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, false))
474-
.to_string()
475-
.into()
476-
}
463+
let name = crate::os_str_as_bytes_lossy(name);
464+
crate::os_string_from_vec(escape_name_inner(&name, style, false))
465+
.expect("all byte sequences should be valid for platform, or already replaced in name")
477466
}
478467

479468
/// Escape a directory name with respect to the given style.
480469
/// This is mainly meant to be used for ls' directory name printing and is not
481470
/// likely to be used elsewhere.
482471
pub fn escape_dir_name(dir_name: &OsStr, style: &QuotingStyle) -> OsString {
483-
#[cfg(unix)]
484-
{
485-
let name = dir_name.as_bytes();
486-
OsStringExt::from_vec(escape_name_inner(name, style, true))
487-
}
488-
#[cfg(not(unix))]
489-
{
490-
let name = dir_name.to_string_lossy();
491-
String::from_utf8_lossy(&escape_name_inner(name.as_bytes(), style, true))
492-
.to_string()
493-
.into()
494-
}
472+
let name = crate::os_str_as_bytes_lossy(dir_name);
473+
crate::os_string_from_vec(escape_name_inner(&name, style, true))
474+
.expect("all byte sequences should be valid for platform, or already replaced in name")
495475
}
496476

497477
impl fmt::Display for QuotingStyle {

src/uucore/src/lib/lib.rs

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -253,9 +253,10 @@ pub fn read_yes() -> bool {
253253
}
254254
}
255255

256-
/// Helper function for processing delimiter values (which could be non UTF-8)
257-
/// It converts OsString to &[u8] for unix targets only
258-
/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
256+
/// Converts an `OsStr` to a UTF-8 `&[u8]`.
257+
///
258+
/// This always succeeds on unix platforms,
259+
/// and fails on other platforms if the string can't be coerced to UTF-8.
259260
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
260261
#[cfg(unix)]
261262
let bytes = os_string.as_bytes();
@@ -271,13 +272,28 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
271272
Ok(bytes)
272273
}
273274

274-
/// Helper function for converting a slice of bytes into an &OsStr
275-
/// or OsString in non-unix targets.
275+
/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
276+
///
277+
/// This is always lossless on unix platforms,
278+
/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
279+
pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
280+
#[cfg(unix)]
281+
let bytes = Cow::from(os_string.as_bytes());
282+
283+
#[cfg(not(unix))]
284+
let bytes = match os_string.to_string_lossy() {
285+
Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
286+
Cow::Owned(owned) => Cow::from(owned.into_bytes()),
287+
};
288+
289+
bytes
290+
}
291+
292+
/// Converts a `&[u8]` to an `&OsStr`,
293+
/// or parses it as UTF-8 into an [`OsString`] on non-unix platforms.
276294
///
277-
/// It converts `&[u8]` to `Cow<OsStr>` for unix targets only.
278-
/// On non-unix (i.e. Windows), the conversion goes through the String type
279-
/// and thus undergo UTF-8 validation, making it fail if the stream contains
280-
/// non-UTF-8 characters.
295+
/// This always succeeds on unix platforms,
296+
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
281297
pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
282298
#[cfg(unix)]
283299
let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
@@ -289,9 +305,10 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
289305
Ok(os_str)
290306
}
291307

292-
/// Helper function for making an `OsString` from a byte field
293-
/// It converts `Vec<u8>` to `OsString` for unix targets only.
294-
/// On non-unix (i.e. Windows) it may fail if the bytes are not valid UTF-8
308+
/// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
309+
///
310+
/// This always succeeds on unix platforms,
311+
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
295312
pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
296313
#[cfg(unix)]
297314
let s = OsString::from_vec(vec);

0 commit comments

Comments
 (0)