Skip to content

Commit aeea7dc

Browse files
committed
Another refactor
1 parent 373c154 commit aeea7dc

File tree

14 files changed

+2057
-2612
lines changed

14 files changed

+2057
-2612
lines changed

crates/common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ config = { workspace = true }
5151
derive_more = { workspace = true }
5252
error-stack = { workspace = true }
5353
http = { workspace = true }
54+
log = { workspace = true }
5455
regex = { workspace = true }
5556
serde = { workspace = true }
5657
serde_json = { workspace = true }

crates/common/src/html_processor.rs

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ impl StreamProcessor for HtmlWithPostProcessing {
6565
}
6666

6767
if changed {
68-
log::info!(
68+
log::debug!(
6969
"HTML post-processing complete: origin_host={}, output_len={}",
7070
self.origin_host,
7171
html.len()
@@ -138,6 +138,37 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
138138
fn protocol_relative_replacement(&self) -> String {
139139
format!("//{}", self.request_host)
140140
}
141+
142+
fn rewrite_url_value(&self, value: &str) -> Option<String> {
143+
if !value.contains(&self.origin_host) {
144+
return None;
145+
}
146+
147+
let https_origin = self.https_origin();
148+
let http_origin = self.http_origin();
149+
let protocol_relative_origin = self.protocol_relative_origin();
150+
let replacement_url = self.replacement_url();
151+
let protocol_relative_replacement = self.protocol_relative_replacement();
152+
153+
let mut rewritten = value
154+
.replace(&https_origin, &replacement_url)
155+
.replace(&http_origin, &replacement_url)
156+
.replace(&protocol_relative_origin, &protocol_relative_replacement);
157+
158+
if rewritten.starts_with(&self.origin_host) {
159+
let suffix = &rewritten[self.origin_host.len()..];
160+
let boundary_ok = suffix.is_empty()
161+
|| matches!(
162+
suffix.as_bytes().first(),
163+
Some(b'/') | Some(b'?') | Some(b'#')
164+
);
165+
if boundary_ok {
166+
rewritten = format!("{}{}", self.request_host, suffix);
167+
}
168+
}
169+
170+
(rewritten != value).then_some(rewritten)
171+
}
141172
}
142173

143174
let patterns = Rc::new(UrlPatterns {
@@ -170,11 +201,8 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
170201
move |el| {
171202
if let Some(mut href) = el.get_attribute("href") {
172203
let original_href = href.clone();
173-
let new_href = href
174-
.replace(&patterns.https_origin(), &patterns.replacement_url())
175-
.replace(&patterns.http_origin(), &patterns.replacement_url());
176-
if new_href != href {
177-
href = new_href;
204+
if let Some(rewritten) = patterns.rewrite_url_value(&href) {
205+
href = rewritten;
178206
}
179207

180208
match integrations.rewrite_attribute(
@@ -211,11 +239,8 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
211239
move |el| {
212240
if let Some(mut src) = el.get_attribute("src") {
213241
let original_src = src.clone();
214-
let new_src = src
215-
.replace(&patterns.https_origin(), &patterns.replacement_url())
216-
.replace(&patterns.http_origin(), &patterns.replacement_url());
217-
if new_src != src {
218-
src = new_src;
242+
if let Some(rewritten) = patterns.rewrite_url_value(&src) {
243+
src = rewritten;
219244
}
220245
match integrations.rewrite_attribute(
221246
"src",
@@ -251,11 +276,8 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
251276
move |el| {
252277
if let Some(mut action) = el.get_attribute("action") {
253278
let original_action = action.clone();
254-
let new_action = action
255-
.replace(&patterns.https_origin(), &patterns.replacement_url())
256-
.replace(&patterns.http_origin(), &patterns.replacement_url());
257-
if new_action != action {
258-
action = new_action;
279+
if let Some(rewritten) = patterns.rewrite_url_value(&action) {
280+
action = rewritten;
259281
}
260282

261283
match integrations.rewrite_attribute(
@@ -510,8 +532,12 @@ mod tests {
510532

511533
let html = r#"<html>
512534
<a href="https://origin.example.com/page">Link</a>
535+
<a href="//origin.example.com/proto">Proto</a>
536+
<a href="origin.example.com/bare">Bare</a>
513537
<img src="http://origin.example.com/image.jpg">
538+
<img src="//origin.example.com/image2.jpg">
514539
<form action="https://origin.example.com/submit">
540+
<form action="//origin.example.com/submit2">
515541
</html>"#;
516542

517543
let mut output = Vec::new();
@@ -521,8 +547,12 @@ mod tests {
521547

522548
let result = String::from_utf8(output).unwrap();
523549
assert!(result.contains(r#"href="https://test.example.com/page""#));
550+
assert!(result.contains(r#"href="//test.example.com/proto""#));
551+
assert!(result.contains(r#"href="test.example.com/bare""#));
524552
assert!(result.contains(r#"src="https://test.example.com/image.jpg""#));
553+
assert!(result.contains(r#"src="//test.example.com/image2.jpg""#));
525554
assert!(result.contains(r#"action="https://test.example.com/submit""#));
555+
assert!(result.contains(r#"action="//test.example.com/submit2""#));
526556
assert!(!result.contains("origin.example.com"));
527557
}
528558

0 commit comments

Comments
 (0)