Skip to content

Commit 371c57a

Browse files
committed
Don't do default port normalization by default
1 parent f17236a commit 371c57a

File tree

5 files changed

+34
-125
lines changed

5 files changed

+34
-125
lines changed

src/build/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,11 +348,10 @@ impl<R, S: To<PortEnd>> Builder<R, S> {
348348
///
349349
/// This method takes either a `u16` or <code>&amp;[EStr]&lt;[Port]&gt;</code> as argument.
350350
///
351-
/// For consistency, you should not produce an empty or [default] port.
351+
/// For consistency, you should not produce an empty or default port.
352352
///
353353
/// [port-spec]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3
354354
/// [Port]: crate::pct_enc::encoder::Port
355-
/// [default]: Scheme::default_port
356355
pub fn port(mut self, port: impl AsPort) -> Builder<R, PortEnd> {
357356
port.push_to(&mut self.inner.buf);
358357
self.cast()

src/component.rs

Lines changed: 1 addition & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -113,97 +113,6 @@ impl Scheme {
113113
}
114114
}
115115

116-
macro_rules! default_port {
117-
($($name:literal, $bname:literal => $port:literal, rfc($rfc:literal))*) => {
118-
impl Scheme {
119-
/// Returns the optional default port of the scheme if it is
120-
/// registered [at IANA][iana] with a permanent status.
121-
///
122-
/// [iana]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
123-
///
124-
/// The following table lists all schemes concerned, their default ports, and references:
125-
///
126-
/// | Scheme | Port | Reference |
127-
/// | - | - | - |
128-
$(#[doc = concat!("| ", $name, " | ", $port, " | [RFC ", $rfc, "](https://datatracker.ietf.org/doc/html/rfc", $rfc, ") |")])*
129-
#[must_use]
130-
pub fn default_port(&self) -> Option<u16> {
131-
const MAX_LEN: usize = {
132-
let mut res = 0;
133-
$(
134-
if $name.len() > res {
135-
res = $name.len();
136-
}
137-
)*
138-
res
139-
};
140-
141-
let len = self.inner.len();
142-
if len > MAX_LEN {
143-
return None;
144-
}
145-
146-
let mut buf = [0; MAX_LEN];
147-
for (i, b) in self.inner.bytes().enumerate() {
148-
buf[i] = b | ASCII_CASE_MASK;
149-
}
150-
151-
match &buf[..len] {
152-
$($bname => Some($port),)*
153-
_ => None,
154-
}
155-
}
156-
}
157-
};
158-
}
159-
160-
default_port! {
161-
"aaa", b"aaa" => 3868, rfc(6733)
162-
"aaas", b"aaas" => 5658, rfc(6733)
163-
"acap", b"acap" => 674, rfc(2244)
164-
"cap", b"cap" => 1026, rfc(4324)
165-
"coap", b"coap" => 5683, rfc(7252)
166-
"coap+tcp", b"coap+tcp" => 5683, rfc(8323)
167-
"coap+ws", b"coap+ws" => 80, rfc(8323)
168-
"coaps", b"coaps" => 5684, rfc(7252)
169-
"coaps+tcp", b"coaps+tcp" => 5684, rfc(8323)
170-
"coaps+ws", b"coaps+ws" => 443, rfc(8323)
171-
"dict", b"dict" => 2628, rfc(2229)
172-
"dns", b"dns" => 53, rfc(4501)
173-
"ftp", b"ftp" => 21, rfc(1738)
174-
"go", b"go" => 1096, rfc(3368)
175-
"gopher", b"gopher" => 70, rfc(4266)
176-
"http", b"http" => 80, rfc(9110)
177-
"https", b"https" => 443, rfc(9110)
178-
"icap", b"icap" => 1344, rfc(3507)
179-
"imap", b"imap" => 143, rfc(5092)
180-
"ipp", b"ipp" => 631, rfc(3510)
181-
"ipps", b"ipps" => 631, rfc(7472)
182-
"ldap", b"ldap" => 389, rfc(4516)
183-
"mtqp", b"mtqp" => 1038, rfc(3887)
184-
"mupdate", b"mupdate" => 3905, rfc(3656)
185-
"nfs", b"nfs" => 2049, rfc(2224)
186-
"nntp", b"nntp" => 119, rfc(5538)
187-
"pop", b"pop" => 110, rfc(2384)
188-
"rtsp", b"rtsp" => 554, rfc(7826)
189-
"rtsps", b"rtsps" => 322, rfc(7826)
190-
"rtspu", b"rtspu" => 554, rfc(2326)
191-
"snmp", b"snmp" => 161, rfc(4088)
192-
"stun", b"stun" => 3478, rfc(7064)
193-
"stuns", b"stuns" => 5349, rfc(7064)
194-
"telnet", b"telnet" => 23, rfc(4248)
195-
"tip", b"tip" => 3372, rfc(2371)
196-
"tn3270", b"tn3270" => 23, rfc(6270)
197-
"turn", b"turn" => 3478, rfc(7065)
198-
"turns", b"turns" => 5349, rfc(7065)
199-
"vemmi", b"vemmi" => 575, rfc(2122)
200-
"vnc", b"vnc" => 5900, rfc(7869)
201-
"ws", b"ws" => 80, rfc(6455)
202-
"wss", b"wss" => 443, rfc(6455)
203-
"z39.50r", b"z39.50r" => 210, rfc(2056)
204-
"z39.50s", b"z39.50s" => 210, rfc(2056)
205-
}
206-
207116
impl PartialEq for Scheme {
208117
#[inline]
209118
fn eq(&self, other: &Self) -> bool {
@@ -461,7 +370,7 @@ impl<'a, UserinfoE: Encoder, RegNameE: Encoder> Authority<'a, UserinfoE, RegName
461370

462371
/// Returns the optional [port] subcomponent.
463372
///
464-
/// A scheme may define a [default port] to use when the port is
373+
/// A scheme may define a default port to use when the port is
465374
/// not present or is empty.
466375
///
467376
/// Note that the port may be empty, with leading zeros, or larger than [`u16::MAX`].
@@ -470,7 +379,6 @@ impl<'a, UserinfoE: Encoder, RegNameE: Encoder> Authority<'a, UserinfoE, RegName
470379
/// mechanism that allows ports larger than [`u16::MAX`].
471380
///
472381
/// [port]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3
473-
/// [default port]: Scheme::default_port
474382
///
475383
/// # Examples
476384
///

src/imp.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ macro_rules! ri_maybe_ref {
569569
/// `self.resolve_against(base).map(|r| r.normalize()).ok()` equals
570570
/// `self.normalize().resolve_against(&base.normalize()).ok()`.
571571
///
572-
/// If you need to resolve multiple references against a common base or configure the behavior
572+
/// If you need to resolve multiple references against a common base or customize the behavior
573573
/// of resolution, consider using [`Resolver`](crate::resolve::Resolver) instead.
574574
///
575575
/// # Errors
@@ -607,15 +607,15 @@ macro_rules! ri_maybe_ref {
607607
/// This method applies syntax-based normalization described in
608608
/// [Section 6.2.2 of RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2)
609609
/// and [Section 5.3.2 of RFC 3987](https://datatracker.ietf.org/doc/html/rfc3987#section-5.3.2),
610-
/// along with IPv6 address and default port normalization.
610+
/// along with IPv6 address normalization.
611611
/// This is effectively equivalent to taking the following steps in order:
612612
///
613613
/// - Decode any percent-encoded octets that correspond to an allowed character which is not reserved.
614614
/// - Uppercase the hexadecimal digits within all percent-encoded octets.
615615
/// - Lowercase all ASCII characters within the scheme and the host except the percent-encoded octets.
616616
/// - Turn any IPv6 literal address into its canonical form as per
617617
/// [RFC 5952](https://datatracker.ietf.org/doc/html/rfc5952).
618-
/// - If the port is empty or equals the [scheme's default], remove it along with the `':'` delimiter.
618+
/// - If the port is empty, remove the preceding `':'` delimiter.
619619
/// - If `self` has a scheme and an [absolute] path, apply the
620620
/// [`remove_dot_segments`] algorithm to the path, taking account of
621621
/// percent-encoded dot segments as described at [`UriRef::resolve_against`].
@@ -624,10 +624,9 @@ macro_rules! ri_maybe_ref {
624624
///
625625
/// This method is idempotent: `self.normalize()` equals `self.normalize().normalize()`.
626626
///
627-
/// If you need to configure the behavior of normalization, consider using [`Normalizer`] instead.
627+
/// If you need to customize the behavior of normalization, consider using [`Normalizer`] instead.
628628
///
629629
/// [`UriRef::resolve_against`]: crate::UriRef::resolve_against
630-
/// [scheme's default]: Scheme::default_port
631630
/// [absolute]: EStr::<Path>::is_absolute
632631
/// [`remove_dot_segments`]: https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
633632
///

src/normalize.rs

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
//! Module for normalization.
22
33
use crate::{
4-
component::Scheme,
54
imp::{HostMeta, Meta, RiMaybeRef, RmrRef},
65
parse,
76
pct_enc::{
@@ -45,15 +44,15 @@ impl crate::Error for NormalizeError {}
4544
#[must_use]
4645
pub struct Normalizer {
4746
allow_path_underflow: bool,
48-
default_port_f: fn(&Scheme) -> Option<u16>,
47+
default_port_f: fn(&str) -> Option<u16>,
4948
}
5049

5150
impl Normalizer {
5251
/// Creates a new `Normalizer` with default configuration.
53-
pub fn new() -> Self {
52+
pub const fn new() -> Self {
5453
Self {
5554
allow_path_underflow: true,
56-
default_port_f: Scheme::default_port,
55+
default_port_f: |_| None,
5756
}
5857
}
5958

@@ -68,41 +67,45 @@ impl Normalizer {
6867
/// ```
6968
/// use fluent_uri::{normalize::{Normalizer, NormalizeError}, Uri};
7069
///
71-
/// let normalizer = Normalizer::new().allow_path_underflow(false);
72-
/// let uri = Uri::parse("http://example.com/..")?;
70+
/// const NORMALIZER: Normalizer = Normalizer::new().allow_path_underflow(false);
7371
///
74-
/// assert_eq!(normalizer.normalize(&uri).unwrap_err(), NormalizeError::PathUnderflow);
72+
/// let uri = Uri::parse("http://example.com/..")?;
73+
/// assert_eq!(NORMALIZER.normalize(&uri).unwrap_err(), NormalizeError::PathUnderflow);
7574
/// # Ok::<_, fluent_uri::ParseError>(())
7675
/// ```
77-
pub fn allow_path_underflow(mut self, value: bool) -> Self {
76+
pub const fn allow_path_underflow(mut self, value: bool) -> Self {
7877
self.allow_path_underflow = value;
7978
self
8079
}
8180

82-
/// Sets the function with which to get the default port of a scheme.
81+
/// Sets the function with which to get the default port for a scheme.
82+
///
83+
/// This defaults to `|_| None`.
8384
///
84-
/// This defaults to [`Scheme::default_port`].
85+
/// The scheme will be lowercased before being passed to the function.
8586
///
8687
/// # Examples
8788
///
8889
/// ```
89-
/// use fluent_uri::{component::Scheme, normalize::Normalizer, Uri};
90+
/// use fluent_uri::{normalize::Normalizer, Uri};
9091
///
91-
/// const SCHEME_FOO: &Scheme = Scheme::new_or_panic("foo");
92-
///
93-
/// let normalizer = Normalizer::new().default_port_with(|scheme| {
94-
/// if scheme == SCHEME_FOO {
95-
/// Some(4673)
96-
/// } else {
97-
/// scheme.default_port()
92+
/// const NORMALIZER: Normalizer = Normalizer::new().default_port_with(|scheme| {
93+
/// // Simply match on `scheme` as it's already in lowercase.
94+
/// match scheme {
95+
/// "http" | "ws" => Some(80),
96+
/// "https" | "wss" => Some(443),
97+
/// _ => None,
9898
/// }
9999
/// });
100-
/// let uri = Uri::parse("foo://localhost:4673")?;
101100
///
102-
/// assert_eq!(normalizer.normalize(&uri).unwrap(), "foo://localhost");
101+
/// let uri = Uri::parse("http://example.com:80/")?;
102+
/// assert_eq!(NORMALIZER.normalize(&uri).unwrap(), "http://example.com/");
103+
///
104+
/// let uri = Uri::parse("foo://example.com:4673/")?;
105+
/// assert_eq!(NORMALIZER.normalize(&uri).unwrap(), "foo://example.com:4673/");
103106
/// # Ok::<_, fluent_uri::ParseError>(())
104107
/// ```
105-
pub fn default_port_with(mut self, f: fn(&Scheme) -> Option<u16>) -> Self {
108+
pub const fn default_port_with(mut self, f: fn(&str) -> Option<u16>) -> Self {
106109
self.default_port_f = f;
107110
self
108111
}
@@ -141,7 +144,7 @@ pub(crate) fn normalize(
141144
r: RmrRef<'_, '_>,
142145
ascii_only: bool,
143146
allow_path_underflow: bool,
144-
default_port_f: fn(&Scheme) -> Option<u16>,
147+
default_port_f: fn(&str) -> Option<u16>,
145148
) -> Result<(String, Meta), NormalizeError> {
146149
// For "a://[::ffff:5:9]/" the capacity is not enough,
147150
// but it's fine since this rarely happens.
@@ -206,8 +209,8 @@ pub(crate) fn normalize(
206209
if let Some(port) = auth.port() {
207210
if !port.is_empty() {
208211
let mut eq_default = false;
209-
if let Some(scheme) = r.scheme_opt() {
210-
if let Some(default) = default_port_f(scheme) {
212+
if let Some(scheme_end) = meta.scheme_end {
213+
if let Some(default) = default_port_f(&buf[..scheme_end.get()]) {
211214
eq_default = port.as_str().parse().ok() == Some(default);
212215
}
213216
}

tests/normalize.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ fn normalize() {
2727
assert_eq!(r.normalize(), "http://example.com/");
2828

2929
let r = UriRef::parse("http://example.com:80/").unwrap();
30-
assert_eq!(r.normalize(), "http://example.com/");
30+
assert_eq!(r.normalize(), "http://example.com:80/");
3131

3232
// Lowercase percent-encoded octet.
3333
let r = UriRef::parse("%3a").unwrap();

0 commit comments

Comments
 (0)