Skip to content

Commit 427cdf3

Browse files
committed
feat: add path operators for Uri<String>
some part copy from url lib
1 parent 7a3e3c0 commit 427cdf3

File tree

3 files changed

+297
-2
lines changed

3 files changed

+297
-2
lines changed

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "fluent-uri"
33
version = "0.3.2"
44
authors = ["Scallop Ye <[email protected]>"]
55
edition = "2021"
6-
rust-version = "1.65"
6+
rust-version = "1.80"
77
description = "A generic URI/IRI handling library compliant with RFC 3986/3987."
88
documentation = "https://docs.rs/fluent-uri"
99
repository = "https://github.com/yescallop/fluent-uri-rs"
@@ -18,7 +18,9 @@ net = []
1818

1919
[dependencies]
2020
borrow-or-share = "0.2"
21+
percent-encoding = "2.3.1"
2122
ref-cast = "1.0"
23+
thiserror = "2.0.12"
2224

2325
[dependencies.serde]
2426
version = "1.0"

src/ri.rs

Lines changed: 280 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use crate::{
55
},
66
component::{Authority, IAuthority, Scheme},
77
encoding::{encode_byte, encoder::*, EStr, Encoder},
8-
error::{ParseError, ResolveError},
8+
error::{BuildError, ParseError, ResolveError},
99
internal::{Criteria, HostMeta, Meta, Parse, RiRef, Value},
1010
normalizer, parser, resolver,
1111
};
@@ -963,6 +963,285 @@ ri_maybe_ref! {
963963
FragmentEncoderType = Fragment,
964964
}
965965

966+
/// Error
967+
#[derive(Debug, thiserror::Error)]
968+
pub enum UriError {
969+
#[error("No Segments")]
970+
NoSegments,
971+
#[error("HostError")]
972+
HostError,
973+
#[error("build error")]
974+
BuildError(#[from] BuildError),
975+
#[error("not absolute path")]
976+
NotAbsolutePath,
977+
#[error("Path illegal")]
978+
IllegalPath,
979+
}
980+
const SCHEME_FILE: &Scheme = Scheme::new_or_panic("file");
981+
impl<'i, 'o, T: BorrowOrShare<'i, 'o, str>> Uri<T> {
982+
/// Assuming the URL is in the `file` scheme or similar,
983+
/// convert its path to an absolute `std::path::Path`.
984+
///
985+
/// **Note:** This does not actually check the URL’s `scheme`,
986+
/// and may give nonsensical results for other schemes.
987+
/// It is the user’s responsibility to check the URL’s scheme before calling this.
988+
///
989+
/// ```
990+
/// # use fluent_uri::Uri;
991+
/// # let url = Uri::parse("file:///etc/passwd").unwrap();
992+
/// let path = url.to_file_path();
993+
/// ```
994+
///
995+
/// # Errors
996+
/// Returns `Err` if the host is neither empty nor `"localhost"` (except on Windows, where
997+
/// `file:` URLs may have a non-local host),
998+
/// or if `Path::new_opt()` returns `None`.
999+
/// (That is, if the percent-decoded path contains a NUL byte or,
1000+
/// for a Windows path, is not UTF-8.)
1001+
///
1002+
/// This method is only available if the `std` Cargo feature is enabled.
1003+
///
1004+
pub fn to_file_path(&'i self) -> Result<std::path::PathBuf, UriError> {
1005+
let segments = self.path();
1006+
let segments = segments
1007+
.segments_if_absolute()
1008+
.ok_or(UriError::NoSegments)?;
1009+
let host: Option<&str> = match self.authority().map(|authority| authority.host()) {
1010+
None | Some("localhost") => None,
1011+
Some(host_data) if self.scheme().as_str() == "file" => Some(host_data),
1012+
Some(_) => return Err(UriError::NoSegments),
1013+
};
1014+
file_url_segments_to_pathbuf(host, segments)
1015+
}
1016+
}
1017+
1018+
mod control_chars {
1019+
use percent_encoding::AsciiSet;
1020+
/// https://url.spec.whatwg.org/#fragment-percent-encode-set
1021+
const FRAGMENT: &AsciiSet = &percent_encoding::CONTROLS
1022+
.add(b' ')
1023+
.add(b'"')
1024+
.add(b'<')
1025+
.add(b'>')
1026+
.add(b'`');
1027+
1028+
/// https://url.spec.whatwg.org/#path-percent-encode-set
1029+
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');
1030+
pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');
1031+
1032+
pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');
1033+
}
1034+
1035+
impl Uri<String> {
1036+
/// Convert a file name as `std::path::Path` into an URL in the `file` scheme.
1037+
///
1038+
/// This returns `Err` if the given path is not absolute or,
1039+
/// on Windows, if the prefix is not a disk prefix (e.g. `C:`) or a UNC prefix (`\\`).
1040+
///
1041+
/// # Examples
1042+
///
1043+
/// On Unix-like platforms:
1044+
///
1045+
/// ```
1046+
/// # if cfg!(unix) {
1047+
/// # use fluent_uri::Uri;
1048+
///
1049+
/// let uri = Uri::from_file_path("/tmp/foo.txt").unwrap();
1050+
/// assert_eq!(uri.as_str(), "file:///tmp/foo.txt");
1051+
///
1052+
/// let uri = Uri::from_file_path("../foo.txt");
1053+
/// assert!(uri.is_err());
1054+
///
1055+
/// let uri = Uri::from_file_path("https://google.com/");
1056+
/// assert!(uri.is_err());
1057+
/// # }
1058+
/// ```
1059+
///
1060+
/// # Errors
1061+
///
1062+
/// Will return error when the path is illegal
1063+
pub fn from_file_path<P: AsRef<std::path::Path>>(path: P) -> Result<Self, UriError> {
1064+
use control_chars::*;
1065+
use percent_encoding::percent_encode;
1066+
use std::os::unix::ffi::OsStrExt;
1067+
let path = path.as_ref();
1068+
if !path.is_absolute() {
1069+
return Err(UriError::NotAbsolutePath);
1070+
}
1071+
let mut serialization = "".to_owned();
1072+
let mut empty = true;
1073+
for component in path.components().skip(1) {
1074+
empty = false;
1075+
serialization.push('/');
1076+
#[cfg(not(target_os = "wasi"))]
1077+
serialization.extend(percent_encode(
1078+
component.as_os_str().as_bytes(),
1079+
SPECIAL_PATH_SEGMENT,
1080+
));
1081+
1082+
#[cfg(target_os = "wasi")]
1083+
serialization.extend(percent_encode(
1084+
component.as_os_str().to_string_lossy().as_bytes(),
1085+
SPECIAL_PATH_SEGMENT,
1086+
));
1087+
}
1088+
if empty {
1089+
serialization.push('/');
1090+
}
1091+
let path = EStr::new(&serialization).ok_or(UriError::IllegalPath)?;
1092+
Ok(Self::builder()
1093+
.scheme(SCHEME_FILE)
1094+
.authority(Authority::EMPTY)
1095+
.path(path)
1096+
.build()?)
1097+
}
1098+
}
1099+
1100+
use crate::encoding::Split;
1101+
#[cfg(all(
1102+
feature = "std",
1103+
any(unix, target_os = "redox", target_os = "wasi", target_os = "hermit")
1104+
))]
1105+
fn file_url_segments_to_pathbuf(
1106+
host: Option<&str>,
1107+
1108+
segments: Split<'_, Path>,
1109+
) -> Result<std::path::PathBuf, UriError> {
1110+
use alloc::vec::Vec;
1111+
1112+
use percent_encoding::percent_decode;
1113+
1114+
#[cfg(not(target_os = "wasi"))]
1115+
use std::ffi::OsStr;
1116+
1117+
#[cfg(target_os = "hermit")]
1118+
use std::os::hermit::ffi::OsStrExt;
1119+
1120+
#[cfg(any(unix, target_os = "redox"))]
1121+
use std::os::unix::prelude::OsStrExt;
1122+
1123+
use std::path::PathBuf;
1124+
1125+
if host.is_some_and(|host| !host.is_empty()) {
1126+
return Err(UriError::HostError);
1127+
}
1128+
1129+
let mut bytes = if cfg!(target_os = "redox") {
1130+
b"file:".to_vec()
1131+
} else {
1132+
Vec::new()
1133+
};
1134+
1135+
for segment in segments {
1136+
bytes.push(b'/');
1137+
1138+
bytes.extend(percent_decode(segment.as_str().as_bytes()));
1139+
}
1140+
1141+
// A windows drive letter must end with a slash.
1142+
1143+
if bytes.len() > 2
1144+
&& bytes[bytes.len() - 2].is_ascii_alphabetic()
1145+
&& matches!(bytes[bytes.len() - 1], b':' | b'|')
1146+
{
1147+
bytes.push(b'/');
1148+
}
1149+
1150+
#[cfg(not(target_os = "wasi"))]
1151+
let path = PathBuf::from(OsStr::from_bytes(&bytes));
1152+
1153+
#[cfg(target_os = "wasi")]
1154+
let path = String::from_utf8(bytes)
1155+
.map(|path| PathBuf::from(path))
1156+
.map_err(|_| ())?;
1157+
1158+
debug_assert!(
1159+
path.is_absolute(),
1160+
"to_file_path() failed to produce an absolute Path"
1161+
);
1162+
1163+
Ok(path)
1164+
}
1165+
1166+
#[cfg(all(feature = "std", windows))]
1167+
fn file_url_segments_to_pathbuf(
1168+
host: Option<&str>,
1169+
segments: Split<'_, Path>,
1170+
) -> Result<std::path::PathBuf, UriError> {
1171+
file_url_segments_to_pathbuf_windows(host, segments)
1172+
}
1173+
1174+
/// https://url.spec.whatwg.org/#ascii-alpha
1175+
#[allow(unused)]
1176+
#[inline]
1177+
fn ascii_alpha(ch: char) -> bool {
1178+
ch.is_ascii_alphabetic()
1179+
}
1180+
1181+
// Build this unconditionally to alleviate https://github.com/servo/rust-url/issues/102
1182+
#[cfg(feature = "std")]
1183+
#[cfg_attr(not(windows), allow(dead_code))]
1184+
fn file_url_segments_to_pathbuf_windows(
1185+
host: Option<&str>,
1186+
mut segments: Split<'_, Path>,
1187+
) -> Result<std::path::PathBuf, UriError> {
1188+
use percent_encoding::percent_decode;
1189+
use std::path::PathBuf;
1190+
1191+
let mut string = if let Some(host) = host {
1192+
r"\\".to_owned() + host
1193+
} else {
1194+
let first = segments.next().ok_or(UriError::HostError)?.as_str();
1195+
1196+
match first.len() {
1197+
2 => {
1198+
if !first.starts_with(ascii_alpha) || first.as_bytes()[1] != b':' {
1199+
return Err(UriError::HostError);
1200+
}
1201+
1202+
first.to_owned()
1203+
}
1204+
1205+
4 => {
1206+
if !first.starts_with(ascii_alpha) {
1207+
return Err(UriError::HostError);
1208+
}
1209+
1210+
let bytes = first.as_bytes();
1211+
1212+
if bytes[1] != b'%' || bytes[2] != b'3' || (bytes[3] != b'a' && bytes[3] != b'A') {
1213+
return Err(UriError::HostError);
1214+
}
1215+
1216+
first[0..1].to_owned() + ":"
1217+
}
1218+
1219+
_ => return Err(UriError::HostError),
1220+
}
1221+
};
1222+
1223+
for segment in segments.map(|seg| seg.as_str()) {
1224+
string.push('\\');
1225+
1226+
// Currently non-unicode windows paths cannot be represented
1227+
1228+
match String::from_utf8(percent_decode(segment.as_bytes()).collect()) {
1229+
Ok(s) => string.push_str(&s),
1230+
1231+
Err(..) => return Err(UriError::HostError),
1232+
}
1233+
}
1234+
1235+
let path = PathBuf::from(string);
1236+
1237+
debug_assert!(
1238+
path.is_absolute(),
1239+
"to_file_path() failed to produce an absolute Path"
1240+
);
1241+
1242+
Ok(path)
1243+
}
1244+
9661245
ri_maybe_ref! {
9671246
Type = UriRef,
9681247
type_name = "UriRef",

tests/to_path_buf.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
use fluent_uri::Uri;
2+
use std::path::Path;
3+
#[test]
4+
fn test_path_roundtrip_conversion() {
5+
let src = std::fs::canonicalize(Path::new(".")).unwrap();
6+
let conv: Uri<String> = Uri::from_file_path(&src).unwrap();
7+
8+
let roundtrip = conv.to_file_path().unwrap();
9+
assert_eq!(src, roundtrip, "conv={conv:?}",);
10+
11+
let url = Uri::from_file_path("/tmp/foo.txt").unwrap();
12+
13+
assert_eq!(url.as_str(), "file:///tmp/foo.txt");
14+
}

0 commit comments

Comments
 (0)