Skip to content

Commit dd02e05

Browse files
committed
Additional refactoring
1 parent 7c645f3 commit dd02e05

File tree

14 files changed

+1755
-617
lines changed

14 files changed

+1755
-617
lines changed

crates/common/src/host_rewrite.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/// Rewrite bare host occurrences (e.g. `origin.example.com/news`) only when the match is a full
2+
/// hostname token, not part of a larger hostname like `cdn.origin.example.com`.
3+
///
4+
/// This is used by both HTML (`__next_f` payloads) and Flight (`text/x-component`) rewriting to
5+
/// avoid corrupting unrelated hostnames.
6+
pub(crate) fn rewrite_bare_host_at_boundaries(
7+
text: &str,
8+
origin_host: &str,
9+
request_host: &str,
10+
) -> Option<String> {
11+
if origin_host.is_empty() || request_host.is_empty() || !text.contains(origin_host) {
12+
return None;
13+
}
14+
15+
fn is_host_char(byte: u8) -> bool {
16+
byte.is_ascii_alphanumeric() || matches!(byte, b'.' | b'-' | b':')
17+
}
18+
19+
let origin_len = origin_host.len();
20+
let bytes = text.as_bytes();
21+
let mut out = String::with_capacity(text.len());
22+
let mut search = 0;
23+
let mut replaced_any = false;
24+
25+
while let Some(rel) = text[search..].find(origin_host) {
26+
let pos = search + rel;
27+
let end = pos + origin_len;
28+
29+
let before_ok = pos == 0 || !is_host_char(bytes[pos - 1]);
30+
let after_ok = end == bytes.len() || !is_host_char(bytes[end]);
31+
32+
if before_ok && after_ok {
33+
out.push_str(&text[search..pos]);
34+
out.push_str(request_host);
35+
replaced_any = true;
36+
search = end;
37+
} else {
38+
out.push_str(&text[search..pos + 1]);
39+
search = pos + 1;
40+
}
41+
}
42+
43+
if !replaced_any {
44+
return None;
45+
}
46+
47+
out.push_str(&text[search..]);
48+
Some(out)
49+
}

crates/common/src/html_processor.rs

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ use std::sync::Arc;
99
use lol_html::{element, html_content::ContentType, text, Settings as RewriterSettings};
1010

1111
use crate::integrations::{
12-
AttributeRewriteOutcome, IntegrationAttributeContext, IntegrationHtmlContext,
13-
IntegrationHtmlPostProcessor, IntegrationRegistry, IntegrationScriptContext,
14-
ScriptRewriteAction,
12+
AttributeRewriteOutcome, IntegrationAttributeContext, IntegrationDocumentState,
13+
IntegrationHtmlContext, IntegrationHtmlPostProcessor, IntegrationRegistry,
14+
IntegrationScriptContext, ScriptRewriteAction,
1515
};
1616
use crate::settings::Settings;
1717
use crate::streaming_processor::{HtmlRewriterAdapter, StreamProcessor};
@@ -23,6 +23,7 @@ struct HtmlWithPostProcessing {
2323
origin_host: String,
2424
request_host: String,
2525
request_scheme: String,
26+
document_state: IntegrationDocumentState,
2627
}
2728

2829
impl StreamProcessor for HtmlWithPostProcessing {
@@ -40,6 +41,7 @@ impl StreamProcessor for HtmlWithPostProcessing {
4041
request_host: &self.request_host,
4142
request_scheme: &self.request_scheme,
4243
origin_host: &self.origin_host,
44+
document_state: &self.document_state,
4345
};
4446

4547
// Preflight to avoid allocating a `String` unless at least one post-processor wants to run.
@@ -77,6 +79,7 @@ impl StreamProcessor for HtmlWithPostProcessing {
7779

7880
fn reset(&mut self) {
7981
self.inner.reset();
82+
self.document_state.clear();
8083
}
8184
}
8285

@@ -110,6 +113,7 @@ impl HtmlProcessorConfig {
110113
/// Create an HTML processor with URL replacement and optional Prebid injection
111114
pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcessor {
112115
let post_processors = config.integrations.html_post_processors();
116+
let document_state = IntegrationDocumentState::default();
113117

114118
// Simplified URL patterns structure - stores only core data and generates variants on-demand
115119
struct UrlPatterns {
@@ -404,15 +408,19 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
404408
let selector = script_rewriter.selector();
405409
let rewriter = script_rewriter.clone();
406410
let patterns = patterns.clone();
411+
let document_state = document_state.clone();
407412
element_content_handlers.push(text!(selector, {
408413
let rewriter = rewriter.clone();
409414
let patterns = patterns.clone();
415+
let document_state = document_state.clone();
410416
move |text| {
411417
let ctx = IntegrationScriptContext {
412418
selector,
413419
request_host: &patterns.request_host,
414420
request_scheme: &patterns.request_scheme,
415421
origin_host: &patterns.origin_host,
422+
is_last_in_text_node: text.last_in_text_node(),
423+
document_state: &document_state,
416424
};
417425
match rewriter.rewrite(text.as_str(), &ctx) {
418426
ScriptRewriteAction::Keep => {}
@@ -439,6 +447,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
439447
origin_host: config.origin_host,
440448
request_host: config.request_host,
441449
request_scheme: config.request_scheme,
450+
document_state,
442451
}
443452
}
444453

crates/common/src/integrations/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ pub mod testlight;
1010

1111
pub use registry::{
1212
AttributeRewriteAction, AttributeRewriteOutcome, IntegrationAttributeContext,
13-
IntegrationAttributeRewriter, IntegrationEndpoint, IntegrationHtmlContext,
14-
IntegrationHtmlPostProcessor, IntegrationMetadata, IntegrationProxy, IntegrationRegistration,
15-
IntegrationRegistrationBuilder, IntegrationRegistry, IntegrationScriptContext,
16-
IntegrationScriptRewriter, ScriptRewriteAction,
13+
IntegrationAttributeRewriter, IntegrationDocumentState, IntegrationEndpoint,
14+
IntegrationHtmlContext, IntegrationHtmlPostProcessor, IntegrationMetadata, IntegrationProxy,
15+
IntegrationRegistration, IntegrationRegistrationBuilder, IntegrationRegistry,
16+
IntegrationScriptContext, IntegrationScriptRewriter, ScriptRewriteAction,
1717
};
1818

1919
type IntegrationBuilder = fn(&Settings) -> Option<IntegrationRegistration>;

0 commit comments

Comments
 (0)