Skip to content

Commit dca8039

Browse files
committed
feat: add path operators for Uri<String>
some part copy from url lib
1 parent 7a3e3c0 commit dca8039

File tree

3 files changed

+314
-2
lines changed

3 files changed

+314
-2
lines changed

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "fluent-uri"
33
version = "0.3.2"
44
authors = ["Scallop Ye <[email protected]>"]
55
edition = "2021"
6-
rust-version = "1.65"
6+
rust-version = "1.80"
77
description = "A generic URI/IRI handling library compliant with RFC 3986/3987."
88
documentation = "https://docs.rs/fluent-uri"
99
repository = "https://github.com/yescallop/fluent-uri-rs"
@@ -18,7 +18,9 @@ net = []
1818

1919
[dependencies]
2020
borrow-or-share = "0.2"
21+
percent-encoding = "2.3.1"
2122
ref-cast = "1.0"
23+
thiserror = "2.0.12"
2224

2325
[dependencies.serde]
2426
version = "1.0"

src/ri.rs

Lines changed: 296 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::{
55
},
66
component::{Authority, IAuthority, Scheme},
77
encoding::{encode_byte, encoder::*, EStr, Encoder},
8-
error::{ParseError, ResolveError},
8+
error::{BuildError, ParseError, ResolveError},
99
internal::{Criteria, HostMeta, Meta, Parse, RiRef, Value},
1010
normalizer, parser, resolver,
1111
};
@@ -963,6 +963,301 @@ ri_maybe_ref! {
963963
FragmentEncoderType = Fragment,
964964
}
965965

966+
/// The error describe what happened during converting [std::path::Path] to [Uri] or converting
967+
/// [Uri] to [std::path::Path]
968+
#[derive(Debug, thiserror::Error)]
969+
pub enum UriPathError {
970+
#[error("No Segments")]
971+
NoSegments,
972+
#[error("HostError")]
973+
HostError,
974+
#[error("build error")]
975+
BuildError(#[from] BuildError),
976+
#[error("not absolute path")]
977+
NotAbsolutePath,
978+
#[error("Path illegal")]
979+
IllegalPath,
980+
#[cfg(target_os = "windows")]
981+
#[error("Path component not know")]
982+
PathComponentNotKnow,
983+
}
984+
const SCHEME_FILE: &Scheme = Scheme::new_or_panic("file");
985+
impl<'i, 'o, T: BorrowOrShare<'i, 'o, str>> Uri<T> {
986+
/// Assuming the URL is in the `file` scheme or similar,
987+
/// convert its path to an absolute `std::path::Path`.
988+
///
989+
/// **Note:** This does not actually check the URL’s `scheme`,
990+
/// and may give nonsensical results for other schemes.
991+
/// It is the user’s responsibility to check the URL’s scheme before calling this.
992+
///
993+
/// ```
994+
/// # use fluent_uri::Uri;
995+
/// # let url = Uri::parse("file:///etc/passwd").unwrap();
996+
/// let path = url.to_file_path();
997+
/// ```
998+
///
999+
/// # Errors
1000+
/// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
1001+
/// `file:` URLs may have a non-local host),
1002+
/// or if `Path::new_opt()` returns `None`.
1003+
/// (That is, if the percent-decoded path contains a NUL byte or,
1004+
/// for a Windows path, is not UTF-8.)
1005+
///
1006+
/// This method is only available if the `std` Cargo feature is enabled.
1007+
///
1008+
pub fn to_file_path(&'i self) -> Result<std::path::PathBuf, UriPathError> {
1009+
let segments = self.path();
1010+
let segments = segments
1011+
.segments_if_absolute()
1012+
.ok_or(UriPathError::NoSegments)?;
1013+
let host: Option<&str> = match self.authority().map(|authority| authority.host()) {
1014+
None | Some("localhost") => None,
1015+
Some(host_data) if self.scheme().as_str() == "file" => Some(host_data),
1016+
Some(_) => return Err(UriPathError::NoSegments),
1017+
};
1018+
file_url_segments_to_pathbuf(host, segments)
1019+
}
1020+
}
1021+
1022+
mod control_chars {
1023+
use percent_encoding::AsciiSet;
1024+
/// https://url.spec.whatwg.org/#fragment-percent-encode-set
1025+
const FRAGMENT: &AsciiSet = &percent_encoding::CONTROLS
1026+
.add(b' ')
1027+
.add(b'"')
1028+
.add(b'<')
1029+
.add(b'>')
1030+
.add(b'`');
1031+
1032+
/// https://url.spec.whatwg.org/#path-percent-encode-set
1033+
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
1034+
pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
1035+
1036+
pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');
1037+
}
1038+
1039+
impl Uri<String> {
1040+
/// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
1041+
///
1042+
/// This returns `Err` if the given path is not absolute or,
1043+
/// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
1044+
///
1045+
/// # Examples
1046+
///
1047+
/// On Unix-like platforms:
1048+
///
1049+
/// ```
1050+
/// # if cfg!(unix) {
1051+
/// # use fluent_uri::Uri;
1052+
///
1053+
/// let uri = Uri::from_file_path("/tmp/foo.txt").unwrap();
1054+
/// assert_eq!(uri.as_str(), "file:///tmp/foo.txt");
1055+
///
1056+
/// let uri = Uri::from_file_path("../foo.txt");
1057+
/// assert!(uri.is_err());
1058+
///
1059+
/// let uri = Uri::from_file_path("https://google.com/");
1060+
/// assert!(uri.is_err());
1061+
/// # }
1062+
/// ```
1063+
///
1064+
/// # Errors
1065+
///
1066+
/// Will return error when the path is illegal
1067+
pub fn from_file_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self, UriPathError> {
1068+
use control_chars::*;
1069+
use percent_encoding::percent_encode;
1070+
#[cfg(target_os = "hermit")]
1071+
use std::os::hermit::ffi::osstrext;
1072+
#[cfg(any(unix, target_os = "redox"))]
1073+
use std::os::unix::ffi::OsStrExt;
1074+
let path = path.as_ref();
1075+
if !path.is_absolute() {
1076+
return Err(UriPathError::NotAbsolutePath);
1077+
}
1078+
let mut serialization = "".to_owned();
1079+
let mut empty = true;
1080+
for component in path.components().skip(1) {
1081+
empty = false;
1082+
serialization.push('/');
1083+
#[cfg(target_os = "windows")]
1084+
serialization.extend(percent_encode(
1085+
component
1086+
.as_os_str()
1087+
.to_str()
1088+
.ok_or(UriPathError::PathComponentNotKnow)?
1089+
.as_bytes(),
1090+
SPECIAL_PATH_SEGMENT,
1091+
));
1092+
#[cfg(all(not(target_os = "wasi"), not(target_os = "windows")))]
1093+
serialization.extend(percent_encode(
1094+
component.as_os_str().as_bytes(),
1095+
SPECIAL_PATH_SEGMENT,
1096+
));
1097+
1098+
#[cfg(target_os = "wasi")]
1099+
serialization.extend(percent_encode(
1100+
component.as_os_str().to_string_lossy().as_bytes(),
1101+
SPECIAL_PATH_SEGMENT,
1102+
));
1103+
}
1104+
if empty {
1105+
serialization.push('/');
1106+
}
1107+
let path = EStr::new(&serialization).ok_or(UriPathError::IllegalPath)?;
1108+
Ok(Self::builder()
1109+
.scheme(SCHEME_FILE)
1110+
.authority(Authority::EMPTY)
1111+
.path(path)
1112+
.build()?)
1113+
}
1114+
}
1115+
1116+
use crate::encoding::Split;
1117+
#[cfg(all(
1118+
feature = "std",
1119+
any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
1120+
))]
1121+
fn file_url_segments_to_pathbuf(
1122+
host: Option<&str>,
1123+
1124+
segments: Split<'_, Path>,
1125+
) -> Result<std::path::PathBuf, UriPathError> {
1126+
use alloc::vec::Vec;
1127+
1128+
use percent_encoding::percent_decode;
1129+
1130+
#[cfg(not(target_os = "wasi"))]
1131+
use std::ffi::OsStr;
1132+
1133+
#[cfg(target_os = "hermit")]
1134+
use std::os::hermit::ffi::OsStrExt;
1135+
1136+
#[cfg(any(unix, target_os = "redox"))]
1137+
use std::os::unix::prelude::OsStrExt;
1138+
1139+
use std::path::PathBuf;
1140+
1141+
if host.is_some_and(|host| !host.is_empty()) {
1142+
return Err(UriPathError::HostError);
1143+
}
1144+
1145+
let mut bytes = if cfg!(target_os = "redox") {
1146+
b"file:".to_vec()
1147+
} else {
1148+
Vec::new()
1149+
};
1150+
1151+
for segment in segments {
1152+
bytes.push(b'/');
1153+
1154+
bytes.extend(percent_decode(segment.as_str().as_bytes()));
1155+
}
1156+
1157+
// A windows drive letter must end with a slash.
1158+
1159+
if bytes.len() > 2
1160+
&& bytes[bytes.len() - 2].is_ascii_alphabetic()
1161+
&& matches!(bytes[bytes.len() - 1], b':' | b'|')
1162+
{
1163+
bytes.push(b'/');
1164+
}
1165+
1166+
#[cfg(not(target_os = "wasi"))]
1167+
let path = PathBuf::from(OsStr::from_bytes(&bytes));
1168+
1169+
#[cfg(target_os = "wasi")]
1170+
let path = String::from_utf8(bytes)
1171+
.map(|path| PathBuf::from(path))
1172+
.map_err(|_| ())?;
1173+
1174+
debug_assert!(
1175+
path.is_absolute(),
1176+
"to_file_path() failed to produce an absolute Path"
1177+
);
1178+
1179+
Ok(path)
1180+
}
1181+
1182+
#[cfg(all(feature = "std", windows))]
1183+
fn file_url_segments_to_pathbuf(
1184+
host: Option<&str>,
1185+
segments: Split<'_, Path>,
1186+
) -> Result<std::path::PathBuf, UriPathError> {
1187+
file_url_segments_to_pathbuf_windows(host, segments)
1188+
}
1189+
1190+
/// https://url.spec.whatwg.org/#ascii-alpha
1191+
#[allow(unused)]
1192+
#[inline]
1193+
fn ascii_alpha(ch: char) -> bool {
1194+
ch.is_ascii_alphabetic()
1195+
}
1196+
1197+
// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
1198+
#[cfg(feature = "std")]
1199+
#[cfg_attr(not(windows), allow(dead_code))]
1200+
fn file_url_segments_to_pathbuf_windows(
1201+
host: Option<&str>,
1202+
mut segments: Split<'_, Path>,
1203+
) -> Result<std::path::PathBuf, UriPathError> {
1204+
use percent_encoding::percent_decode;
1205+
use std::path::PathBuf;
1206+
1207+
let mut string = if let Some(host) = host {
1208+
r"\\".to_owned() + host
1209+
} else {
1210+
let first = segments.next().ok_or(UriPathError::HostError)?.as_str();
1211+
1212+
match first.len() {
1213+
2 => {
1214+
if !first.starts_with(ascii_alpha) || first.as_bytes()[1] != b':' {
1215+
return Err(UriPathError::HostError);
1216+
}
1217+
1218+
first.to_owned()
1219+
}
1220+
1221+
4 => {
1222+
if !first.starts_with(ascii_alpha) {
1223+
return Err(UriPathError::HostError);
1224+
}
1225+
1226+
let bytes = first.as_bytes();
1227+
1228+
if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
1229+
return Err(UriPathError::HostError);
1230+
}
1231+
1232+
first[0..1].to_owned() + ":"
1233+
}
1234+
1235+
_ => return Err(UriPathError::HostError),
1236+
}
1237+
};
1238+
1239+
for segment in segments.map(|seg| seg.as_str()) {
1240+
string.push('\\');
1241+
1242+
// Currently non-unicode windows paths cannot be represented
1243+
1244+
match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
1245+
Ok(s) => string.push_str(&s),
1246+
1247+
Err(..) => return Err(UriPathError::HostError),
1248+
}
1249+
}
1250+
1251+
let path = PathBuf::from(string);
1252+
1253+
debug_assert!(
1254+
path.is_absolute(),
1255+
"to_file_path() failed to produce an absolute Path"
1256+
);
1257+
1258+
Ok(path)
1259+
}
1260+
9661261
ri_maybe_ref! {
9671262
Type = UriRef,
9681263
type_name = "UriRef",

tests/to_path_buf.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#[cfg(unix)]
2+
#[test]
3+
fn test_path_roundtrip_conversion() {
4+
use fluent_uri::Uri;
5+
use std::path::Path;
6+
let src = std::fs::canonicalize(Path::new(".")).unwrap();
7+
let conv: Uri<String> = Uri::from_file_path(&src).unwrap();
8+
9+
let roundtrip = conv.to_file_path().unwrap();
10+
assert_eq!(src, roundtrip, "conv={conv:?}",);
11+
12+
let url = Uri::from_file_path("/tmp/foo.txt").unwrap();
13+
14+
assert_eq!(url.as_str(), "file:///tmp/foo.txt");
15+
}

0 commit comments

Comments
 (0)