Skip to content

Commit 044b6e6

Browse files
Add custom URL parser for LSPS5.
Adds a new url_utils.rs module that provides: - A lightweight URL parser specialized for LSPS5 webhook validation - An implementation focusing on scheme and host extraction - RFC-compliant scheme validation - Tests for various URL scenarios This implementation allows validating webhook URLs without depending on the external url crate
1 parent 55410ef commit 044b6e6

File tree

1 file changed

+234
-0
lines changed

1 file changed

+234
-0
lines changed
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
// This file is Copyright its original authors, visible in version control
2+
// history.
3+
//
4+
// This file is licensed under the Apache License, Version 2.0 <LICENSE-APACHE
5+
// or http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your option.
7+
// You may not use this file except in accordance with one or both of these
8+
// licenses.
9+
10+
//! URL utilities for LSPS5 webhook notifications.
11+
12+
use crate::alloc::string::ToString;
13+
use alloc::string::String;
14+
use lightning_types::string::UntrustedString;
15+
16+
use super::msgs::LSPS5ProtocolError;
17+
18+
/// A URL implementation for scheme and host extraction.
19+
/// Simplified representation of a URL with just scheme and host components.
20+
/// This struct provides parsing and access to these core parts of a URL string.
21+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
22+
pub struct LSPSUrl {
23+
host: UntrustedString,
24+
/// The full URL string.
25+
url: UntrustedString,
26+
}
27+
28+
impl LSPSUrl {
29+
/// Parses a URL string into a URL instance.
30+
/// Extracts the scheme and host from any standard URL.
31+
///
32+
/// # Arguments
33+
/// * `url_str` - The URL string to parse
34+
///
35+
/// # Returns
36+
/// A Result containing either the parsed URL or an error message.
37+
pub fn parse(url_str: String) -> Result<Self, LSPS5ProtocolError> {
38+
if !url_str.is_ascii() {
39+
return Err(LSPS5ProtocolError::UrlParse);
40+
}
41+
42+
if url_str.chars().any(|c| c.is_control()) {
43+
return Err(LSPS5ProtocolError::UrlParse);
44+
}
45+
46+
let (scheme, remainder) =
47+
url_str.split_once("://").ok_or_else(|| (LSPS5ProtocolError::UrlParse))?;
48+
49+
if !scheme.eq_ignore_ascii_case("https") {
50+
return Err(LSPS5ProtocolError::UnsupportedProtocol);
51+
}
52+
53+
let host_section = remainder
54+
.split(['/', '?', '#'])
55+
.next()
56+
.ok_or_else(|| (LSPS5ProtocolError::UrlParse))?;
57+
58+
let host_without_auth = host_section
59+
.split('@')
60+
.next_back()
61+
.filter(|s| !s.is_empty())
62+
.ok_or_else(|| (LSPS5ProtocolError::UrlParse))?;
63+
64+
if host_without_auth.is_empty() || host_without_auth.contains(' ') {
65+
return Err(LSPS5ProtocolError::UrlParse);
66+
}
67+
68+
let host_str = match host_without_auth.rsplit_once(':') {
69+
Some((hostname, _port)) if hostname.is_empty() => {
70+
return Err(LSPS5ProtocolError::UrlParse)
71+
},
72+
Some((hostname, port)) => {
73+
if port.is_empty() {
74+
hostname.to_string()
75+
} else if port.parse::<u16>().is_err() {
76+
return Err(LSPS5ProtocolError::UrlParse);
77+
} else {
78+
host_without_auth.to_string()
79+
}
80+
},
81+
None => host_without_auth.to_string(),
82+
};
83+
84+
Ok(LSPSUrl {
85+
host: UntrustedString(host_str.to_string()),
86+
url: UntrustedString(url_str.to_string()),
87+
})
88+
}
89+
90+
/// Returns URL length.
91+
pub fn url_length(&self) -> usize {
92+
self.url.0.chars().count()
93+
}
94+
95+
/// Returns the full URL string.
96+
pub fn url(&self) -> &str {
97+
self.url.0.as_str()
98+
}
99+
}
100+
101+
#[cfg(test)]
102+
mod tests {
103+
use super::*;
104+
use alloc::vec::Vec;
105+
use proptest::prelude::*;
106+
107+
#[test]
108+
fn test_extremely_long_url() {
109+
let n = 1000;
110+
let host = "a".repeat(n);
111+
let url_str = format!("https://{}/path", host).to_string();
112+
let result = LSPSUrl::parse(url_str);
113+
114+
assert!(result.is_ok());
115+
let url = result.unwrap();
116+
assert_eq!(url.host.0.chars().count(), n);
117+
}
118+
119+
#[test]
120+
fn test_parse_http_url() {
121+
let url_str = "http://example.com/path".to_string();
122+
let url = LSPSUrl::parse(url_str).unwrap_err();
123+
assert_eq!(url, LSPS5ProtocolError::UnsupportedProtocol);
124+
}
125+
126+
#[test]
127+
fn valid_lsps_url() {
128+
let test_vec: Vec<(&'static str, &'static str)> = vec![
129+
("https://www.example.org/push?l=1234567890abcopqrstuv&c=best", "www.example.org"),
130+
("https://www.example.com/path", "www.example.com"),
131+
("https://example.org", "example.org"),
132+
("https://example.com:8080/path", "example.com:8080"),
133+
("https://api.example.com/v1/resources", "api.example.com"),
134+
("https://example.com/page#section1", "example.com"),
135+
("https://example.com/search?q=test#results", "example.com"),
136+
("https://user:[email protected]/", "example.com"),
137+
("https://192.168.1.1/admin", "192.168.1.1"),
138+
("https://example.com/path with spaces", "example.com"),
139+
("https://example.com://path", "example.com"),
140+
];
141+
for (url_str, expected_host) in test_vec {
142+
let url = LSPSUrl::parse(url_str.to_string());
143+
assert!(url.is_ok(), "Failed to parse URL: {}", url_str);
144+
assert_eq!(url.unwrap().host.0, expected_host);
145+
}
146+
}
147+
148+
#[test]
149+
fn invalid_lsps_url() {
150+
let test_vec = vec![
151+
"ftp://ftp.example.org/pub/files/document.pdf",
152+
"sftp://user:[email protected]:22/uploads/",
153+
"ssh://[email protected]:2222",
154+
"lightning://03a.example.com/invoice?amount=10000",
155+
"ftp://[email protected]/files/",
156+
"https://例子.测试/path",
157+
"a123+-.://example.com",
158+
"a123+-.://example.com",
159+
"https:\\\\example.com\\path",
160+
"https:///whatever",
161+
];
162+
for url_str in test_vec {
163+
let url = LSPSUrl::parse(url_str.to_string());
164+
assert!(url.is_err(), "Expected error for URL: {}", url_str);
165+
}
166+
}
167+
168+
#[test]
169+
fn parsing_errors() {
170+
let test_vec = vec![
171+
"example.com/path",
172+
"https://bad domain.com/",
173+
"https://example.com\0/path",
174+
"https://",
175+
"ht@ps://example.com",
176+
"http!://example.com",
177+
"1https://example.com",
178+
"https://://example.com",
179+
"https://example.com:port/path",
180+
"https://:8080/path",
181+
"https:",
182+
"://",
183+
"https://example.com\0/path",
184+
];
185+
for url_str in test_vec {
186+
let url = LSPSUrl::parse(url_str.to_string());
187+
assert!(url.is_err(), "Expected error for URL: {}", url_str);
188+
}
189+
}
190+
191+
proptest! {
192+
/// For any valid URL matching the regex: if it parses, then
193+
/// - round-trip .url() == original,
194+
/// - url_length() == .chars().count()
195+
/// - host is non-empty and substring of the original,
196+
/// - port (if present) is numeric,
197+
/// - IPv4 hosts match expected pattern,
198+
#[test]
199+
fn test_url_properties(
200+
url_str in proptest::string::string_regex(
201+
r"([a-z][a-z0-9+.-]*)://((?:[a-z0-9._~%!$&()*+,;=-]+@)?(?:localhost|\d{1,3}(?:\.\d{1,3}){3}|\[[a-fA-F0-9:.]+\]|[a-z0-9._~%+-]+(?:\.[a-z0-9._~%+-]+)*))(?::\d{1,5})?(/[a-z0-9._~%!$&()*+,;=:@/-]*)?(\?[a-z0-9._~%!$&()*+,;=:@/?-]*)?(\#[a-z0-9._~%!$&()*+,;=:@/?-]*)?"
202+
).unwrap()
203+
) {
204+
if let Ok(u) = LSPSUrl::parse(url_str.to_string()) {
205+
prop_assert_eq!(u.url(), url_str.clone());
206+
prop_assert_eq!(u.url_length(), url_str.chars().count());
207+
208+
// Check URL starts with "https://" (since we only support HTTPS)
209+
prop_assert!(url_str.starts_with("https://"));
210+
211+
prop_assert!(!u.host.0.is_empty());
212+
prop_assert!(url_str.contains(&u.host.0));
213+
214+
if let Some(idx) = u.host.0.rfind(':') {
215+
let (host_part, port_part) = u.host.0.split_at(idx);
216+
if !host_part.is_empty() && port_part.len() > 1 {
217+
let port_str = &port_part[1..];
218+
prop_assert!(port_str.chars().all(|c| c.is_ascii_digit()));
219+
// Port must be in 0..=u32::MAX (parseable as u32)
220+
prop_assert!(port_str.parse::<u32>().is_ok());
221+
}
222+
}
223+
224+
if u.host.0.chars().all(|c| c.is_ascii_digit() || c == '.') && u.host.0.matches('.').count() == 3 {
225+
let octets: Vec<_> = u.host.0.split('.').collect();
226+
prop_assert_eq!(octets.len(), 4);
227+
for octet in octets {
228+
prop_assert!(!octet.is_empty());
229+
}
230+
}
231+
}
232+
}
233+
}
234+
}

0 commit comments

Comments
 (0)