Skip to content

Commit e50b873

Browse files
committed
feat: add path operators for Uri<String>
some part copy from url lib
1 parent 7a3e3c0 commit e50b873

File tree

3 files changed

+298
-2
lines changed

3 files changed

+298
-2
lines changed

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "fluent-uri"
33
version = "0.3.2"
44
authors = ["Scallop Ye <[email protected]>"]
55
edition = "2021"
6-
rust-version = "1.65"
6+
rust-version = "1.80"
77
description = "A generic URI/IRI handling library compliant with RFC 3986/3987."
88
documentation = "https://docs.rs/fluent-uri"
99
repository = "https://github.com/yescallop/fluent-uri-rs"
@@ -18,7 +18,9 @@ net = []
1818

1919
[dependencies]
2020
borrow-or-share = "0.2"
21+
percent-encoding = "2.3.1"
2122
ref-cast = "1.0"
23+
thiserror = "2.0.12"
2224

2325
[dependencies.serde]
2426
version = "1.0"

src/ri.rs

Lines changed: 281 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::{
55
},
66
component::{Authority, IAuthority, Scheme},
77
encoding::{encode_byte, encoder::*, EStr, Encoder},
8-
error::{ParseError, ResolveError},
8+
error::{BuildError, ParseError, ResolveError},
99
internal::{Criteria, HostMeta, Meta, Parse, RiRef, Value},
1010
normalizer, parser, resolver,
1111
};
@@ -963,6 +963,286 @@ ri_maybe_ref! {
963963
FragmentEncoderType = Fragment,
964964
}
965965

966+
/// The error describe what happened during converting [std::path::Path] to [Uri] or converting
967+
/// [Uri] to [std::path::Path]
968+
#[derive(Debug, thiserror::Error)]
969+
pub enum UriPathError {
970+
#[error("No Segments")]
971+
NoSegments,
972+
#[error("HostError")]
973+
HostError,
974+
#[error("build error")]
975+
BuildError(#[from] BuildError),
976+
#[error("not absolute path")]
977+
NotAbsolutePath,
978+
#[error("Path illegal")]
979+
IllegalPath,
980+
}
981+
const SCHEME_FILE: &Scheme = Scheme::new_or_panic("file");
982+
impl<'i, 'o, T: BorrowOrShare<'i, 'o, str>> Uri<T> {
983+
/// Assuming the URL is in the `file` scheme or similar,
984+
/// convert its path to an absolute `std::path::Path`.
985+
///
986+
/// **Note:** This does not actually check the URL’s `scheme`,
987+
/// and may give nonsensical results for other schemes.
988+
/// It is the user’s responsibility to check the URL’s scheme before calling this.
989+
///
990+
/// ```
991+
/// # use fluent_uri::Uri;
992+
/// # let url = Uri::parse("file:///etc/passwd").unwrap();
993+
/// let path = url.to_file_path();
994+
/// ```
995+
///
996+
/// # Errors
997+
/// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
998+
/// `file:` URLs may have a non-local host),
999+
/// or if `Path::new_opt()` returns `None`.
1000+
/// (That is, if the percent-decoded path contains a NUL byte or,
1001+
/// for a Windows path, is not UTF-8.)
1002+
///
1003+
/// This method is only available if the `std` Cargo feature is enabled.
1004+
///
1005+
pub fn to_file_path(&'i self) -> Result<std::path::PathBuf, UriPathError> {
1006+
let segments = self.path();
1007+
let segments = segments
1008+
.segments_if_absolute()
1009+
.ok_or(UriPathError::NoSegments)?;
1010+
let host: Option<&str> = match self.authority().map(|authority| authority.host()) {
1011+
None | Some("localhost") => None,
1012+
Some(host_data) if self.scheme().as_str() == "file" => Some(host_data),
1013+
Some(_) => return Err(UriPathError::NoSegments),
1014+
};
1015+
file_url_segments_to_pathbuf(host, segments)
1016+
}
1017+
}
1018+
1019+
mod control_chars {
1020+
use percent_encoding::AsciiSet;
1021+
/// https://url.spec.whatwg.org/#fragment-percent-encode-set
1022+
const FRAGMENT: &AsciiSet = &percent_encoding::CONTROLS
1023+
.add(b' ')
1024+
.add(b'"')
1025+
.add(b'<')
1026+
.add(b'>')
1027+
.add(b'`');
1028+
1029+
/// https://url.spec.whatwg.org/#path-percent-encode-set
1030+
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
1031+
pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
1032+
1033+
pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');
1034+
}
1035+
1036+
impl Uri<String> {
1037+
/// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
1038+
///
1039+
/// This returns `Err` if the given path is not absolute or,
1040+
/// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
1041+
///
1042+
/// # Examples
1043+
///
1044+
/// On Unix-like platforms:
1045+
///
1046+
/// ```
1047+
/// # if cfg!(unix) {
1048+
/// # use fluent_uri::Uri;
1049+
///
1050+
/// let uri = Uri::from_file_path("/tmp/foo.txt").unwrap();
1051+
/// assert_eq!(uri.as_str(), "file:///tmp/foo.txt");
1052+
///
1053+
/// let uri = Uri::from_file_path("../foo.txt");
1054+
/// assert!(uri.is_err());
1055+
///
1056+
/// let uri = Uri::from_file_path("https://google.com/");
1057+
/// assert!(uri.is_err());
1058+
/// # }
1059+
/// ```
1060+
///
1061+
/// # Errors
1062+
///
1063+
/// Will return error when the path is illegal
1064+
pub fn from_file_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self, UriPathError> {
1065+
use control_chars::*;
1066+
use percent_encoding::percent_encode;
1067+
use std::os::unix::ffi::OsStrExt;
1068+
let path = path.as_ref();
1069+
if !path.is_absolute() {
1070+
return Err(UriPathError::NotAbsolutePath);
1071+
}
1072+
let mut serialization = "".to_owned();
1073+
let mut empty = true;
1074+
for component in path.components().skip(1) {
1075+
empty = false;
1076+
serialization.push('/');
1077+
#[cfg(not(target_os = "wasi"))]
1078+
serialization.extend(percent_encode(
1079+
component.as_os_str().as_bytes(),
1080+
SPECIAL_PATH_SEGMENT,
1081+
));
1082+
1083+
#[cfg(target_os = "wasi")]
1084+
serialization.extend(percent_encode(
1085+
component.as_os_str().to_string_lossy().as_bytes(),
1086+
SPECIAL_PATH_SEGMENT,
1087+
));
1088+
}
1089+
if empty {
1090+
serialization.push('/');
1091+
}
1092+
let path = EStr::new(&serialization).ok_or(UriPathError::IllegalPath)?;
1093+
Ok(Self::builder()
1094+
.scheme(SCHEME_FILE)
1095+
.authority(Authority::EMPTY)
1096+
.path(path)
1097+
.build()?)
1098+
}
1099+
}
1100+
1101+
use crate::encoding::Split;
1102+
#[cfg(all(
1103+
feature = "std",
1104+
any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
1105+
))]
1106+
fn file_url_segments_to_pathbuf(
1107+
host: Option<&str>,
1108+
1109+
segments: Split<'_, Path>,
1110+
) -> Result<std::path::PathBuf, UriPathError> {
1111+
use alloc::vec::Vec;
1112+
1113+
use percent_encoding::percent_decode;
1114+
1115+
#[cfg(not(target_os = "wasi"))]
1116+
use std::ffi::OsStr;
1117+
1118+
#[cfg(target_os = "hermit")]
1119+
use std::os::hermit::ffi::OsStrExt;
1120+
1121+
#[cfg(any(unix, target_os = "redox"))]
1122+
use std::os::unix::prelude::OsStrExt;
1123+
1124+
use std::path::PathBuf;
1125+
1126+
if host.is_some_and(|host| !host.is_empty()) {
1127+
return Err(UriPathError::HostError);
1128+
}
1129+
1130+
let mut bytes = if cfg!(target_os = "redox") {
1131+
b"file:".to_vec()
1132+
} else {
1133+
Vec::new()
1134+
};
1135+
1136+
for segment in segments {
1137+
bytes.push(b'/');
1138+
1139+
bytes.extend(percent_decode(segment.as_str().as_bytes()));
1140+
}
1141+
1142+
// A windows drive letter must end with a slash.
1143+
1144+
if bytes.len() > 2
1145+
&& bytes[bytes.len() - 2].is_ascii_alphabetic()
1146+
&& matches!(bytes[bytes.len() - 1], b':' | b'|')
1147+
{
1148+
bytes.push(b'/');
1149+
}
1150+
1151+
#[cfg(not(target_os = "wasi"))]
1152+
let path = PathBuf::from(OsStr::from_bytes(&bytes));
1153+
1154+
#[cfg(target_os = "wasi")]
1155+
let path = String::from_utf8(bytes)
1156+
.map(|path| PathBuf::from(path))
1157+
.map_err(|_| ())?;
1158+
1159+
debug_assert!(
1160+
path.is_absolute(),
1161+
"to_file_path() failed to produce an absolute Path"
1162+
);
1163+
1164+
Ok(path)
1165+
}
1166+
1167+
#[cfg(all(feature = "std", windows))]
1168+
fn file_url_segments_to_pathbuf(
1169+
host: Option<&str>,
1170+
segments: Split<'_, Path>,
1171+
) -> Result<std::path::PathBuf, UriPathError> {
1172+
file_url_segments_to_pathbuf_windows(host, segments)
1173+
}
1174+
1175+
/// https://url.spec.whatwg.org/#ascii-alpha
1176+
#[allow(unused)]
1177+
#[inline]
1178+
fn ascii_alpha(ch: char) -> bool {
1179+
ch.is_ascii_alphabetic()
1180+
}
1181+
1182+
// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
1183+
#[cfg(feature = "std")]
1184+
#[cfg_attr(not(windows), allow(dead_code))]
1185+
fn file_url_segments_to_pathbuf_windows(
1186+
host: Option<&str>,
1187+
mut segments: Split<'_, Path>,
1188+
) -> Result<std::path::PathBuf, UriPathError> {
1189+
use percent_encoding::percent_decode;
1190+
use std::path::PathBuf;
1191+
1192+
let mut string = if let Some(host) = host {
1193+
r"\\".to_owned() + host
1194+
} else {
1195+
let first = segments.next().ok_or(UriPathError::HostError)?.as_str();
1196+
1197+
match first.len() {
1198+
2 => {
1199+
if !first.starts_with(ascii_alpha) || first.as_bytes()[1] != b':' {
1200+
return Err(UriPathError::HostError);
1201+
}
1202+
1203+
first.to_owned()
1204+
}
1205+
1206+
4 => {
1207+
if !first.starts_with(ascii_alpha) {
1208+
return Err(UriPathError::HostError);
1209+
}
1210+
1211+
let bytes = first.as_bytes();
1212+
1213+
if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
1214+
return Err(UriPathError::HostError);
1215+
}
1216+
1217+
first[0..1].to_owned() + ":"
1218+
}
1219+
1220+
_ => return Err(UriPathError::HostError),
1221+
}
1222+
};
1223+
1224+
for segment in segments.map(|seg| seg.as_str()) {
1225+
string.push('\\');
1226+
1227+
// Currently non-unicode windows paths cannot be represented
1228+
1229+
match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
1230+
Ok(s) => string.push_str(&s),
1231+
1232+
Err(..) => return Err(UriPathError::HostError),
1233+
}
1234+
}
1235+
1236+
let path = PathBuf::from(string);
1237+
1238+
debug_assert!(
1239+
path.is_absolute(),
1240+
"to_file_path() failed to produce an absolute Path"
1241+
);
1242+
1243+
Ok(path)
1244+
}
1245+
9661246
ri_maybe_ref! {
9671247
Type = UriRef,
9681248
type_name = "UriRef",

tests/to_path_buf.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
use fluent_uri::Uri;
2+
use std::path::Path;
3+
#[test]
4+
fn test_path_roundtrip_conversion() {
5+
let src = std::fs::canonicalize(Path::new(".")).unwrap();
6+
let conv: Uri<String> = Uri::from_file_path(&src).unwrap();
7+
8+
let roundtrip = conv.to_file_path().unwrap();
9+
assert_eq!(src, roundtrip, "conv={conv:?}",);
10+
11+
let url = Uri::from_file_path("/tmp/foo.txt").unwrap();
12+
13+
assert_eq!(url.as_str(), "file:///tmp/foo.txt");
14+
}

0 commit comments

Comments
 (0)