diff --git a/crates/common/src/html_processor.rs b/crates/common/src/html_processor.rs
index 9473e8e..5325a1e 100644
--- a/crates/common/src/html_processor.rs
+++ b/crates/common/src/html_processor.rs
@@ -4,7 +4,8 @@
use std::cell::Cell;
use std::rc::Rc;
-use lol_html::{element, html_content::ContentType, Settings as RewriterSettings};
+use lol_html::{element, html_content::ContentType, text, Settings as RewriterSettings};
+use regex::Regex;
use crate::settings::Settings;
use crate::streaming_processor::{HtmlRewriterAdapter, StreamProcessor};
@@ -17,6 +18,8 @@ pub struct HtmlProcessorConfig {
pub request_host: String,
pub request_scheme: String,
pub enable_prebid: bool,
+ pub nextjs_enabled: bool,
+ pub nextjs_attributes: Vec,
}
impl HtmlProcessorConfig {
@@ -32,6 +35,8 @@ impl HtmlProcessorConfig {
request_host: request_host.to_string(),
request_scheme: request_scheme.to_string(),
enable_prebid: settings.prebid.auto_configure,
+ nextjs_enabled: settings.publisher.nextjs.enabled,
+ nextjs_attributes: settings.publisher.nextjs.rewrite_attributes.clone(),
}
}
}
@@ -65,6 +70,39 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
fn protocol_relative_replacement(&self) -> String {
format!("//{}", self.request_host)
}
+
+ fn rewrite_nextjs_values(&self, content: &str, attributes: &[String]) -> Option {
+ let mut rewritten = content.to_string();
+ let mut changed = false;
+ let escaped_origin = regex::escape(&self.origin_host);
+ for attribute in attributes {
+ let escaped_attr = regex::escape(attribute);
+ let pattern = format!(
+ r#"(?P(?:\\*")?{attr}(?:\\*")?:\\*")(?Phttps?://|//){origin}"#,
+ attr = escaped_attr,
+ origin = escaped_origin
+ );
+ let regex = Regex::new(&pattern).expect("valid Next.js rewrite regex");
+ let new_value = regex.replace_all(&rewritten, |caps: ®ex::Captures| {
+ let scheme = &caps["scheme"];
+ let replacement = if scheme == "//" {
+ format!("//{}", self.request_host)
+ } else {
+ self.replacement_url()
+ };
+ format!("{}{}", &caps["prefix"], replacement)
+ });
+ if new_value != rewritten {
+ changed = true;
+ rewritten = new_value.into_owned();
+ }
+ }
+ if changed {
+ Some(rewritten)
+ } else {
+ None
+ }
+ }
}
let patterns = Rc::new(UrlPatterns {
@@ -73,6 +111,8 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
request_scheme: config.request_scheme.clone(),
});
+ let nextjs_attributes = Rc::new(config.nextjs_attributes.clone());
+
let injected_tsjs = Rc::new(Cell::new(false));
fn is_prebid_script_url(url: &str) -> bool {
@@ -85,119 +125,150 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
)
}
- let rewriter_settings = RewriterSettings {
- element_content_handlers: vec![
- // Inject tsjs once at the start of
- element!("head", {
- let injected_tsjs = injected_tsjs.clone();
- move |el| {
- if !injected_tsjs.get() {
- let loader = tsjs::core_script_tag();
- el.prepend(&loader, ContentType::Html);
- injected_tsjs.set(true);
- }
- Ok(())
- }
- }),
- // Replace URLs in href attributes
- element!("[href]", {
- let patterns = patterns.clone();
- let rewrite_prebid = config.enable_prebid;
- move |el| {
- if let Some(href) = el.get_attribute("href") {
- // If Prebid auto-config is enabled and this looks like a Prebid script href, rewrite to our extension
- if rewrite_prebid && is_prebid_script_url(&href) {
- let ext_src = tsjs::ext_script_src();
- el.set_attribute("href", &ext_src)?;
- } else {
- let new_href = href
- .replace(&patterns.https_origin(), &patterns.replacement_url())
- .replace(&patterns.http_origin(), &patterns.replacement_url());
- if new_href != href {
- el.set_attribute("href", &new_href)?;
- }
- }
- }
- Ok(())
+ let mut element_content_handlers = vec![
+ // Inject tsjs once at the start of
+ element!("head", {
+ let injected_tsjs = injected_tsjs.clone();
+ move |el| {
+ if !injected_tsjs.get() {
+ let loader = tsjs::core_script_tag();
+ el.prepend(&loader, ContentType::Html);
+ injected_tsjs.set(true);
}
- }),
- // Replace URLs in src attributes
- element!("[src]", {
- let patterns = patterns.clone();
- let rewrite_prebid = config.enable_prebid;
- move |el| {
- if let Some(src) = el.get_attribute("src") {
- if rewrite_prebid && is_prebid_script_url(&src) {
- let ext_src = tsjs::ext_script_src();
- el.set_attribute("src", &ext_src)?;
- } else {
- let new_src = src
- .replace(&patterns.https_origin(), &patterns.replacement_url())
- .replace(&patterns.http_origin(), &patterns.replacement_url());
- if new_src != src {
- el.set_attribute("src", &new_src)?;
- }
+ Ok(())
+ }
+ }),
+ // Replace URLs in href attributes
+ element!("[href]", {
+ let patterns = patterns.clone();
+ let rewrite_prebid = config.enable_prebid;
+ move |el| {
+ if let Some(href) = el.get_attribute("href") {
+ // If Prebid auto-config is enabled and this looks like a Prebid script href, rewrite to our extension
+ if rewrite_prebid && is_prebid_script_url(&href) {
+ let ext_src = tsjs::ext_script_src();
+ el.set_attribute("href", &ext_src)?;
+ } else {
+ let new_href = href
+ .replace(&patterns.https_origin(), &patterns.replacement_url())
+ .replace(&patterns.http_origin(), &patterns.replacement_url());
+ if new_href != href {
+ el.set_attribute("href", &new_href)?;
}
}
- Ok(())
}
- }),
- // Replace URLs in action attributes
- element!("[action]", {
- let patterns = patterns.clone();
- move |el| {
- if let Some(action) = el.get_attribute("action") {
- let new_action = action
+ Ok(())
+ }
+ }),
+ // Replace URLs in src attributes
+ element!("[src]", {
+ let patterns = patterns.clone();
+ let rewrite_prebid = config.enable_prebid;
+ move |el| {
+ if let Some(src) = el.get_attribute("src") {
+ if rewrite_prebid && is_prebid_script_url(&src) {
+ let ext_src = tsjs::ext_script_src();
+ el.set_attribute("src", &ext_src)?;
+ } else {
+ let new_src = src
.replace(&patterns.https_origin(), &patterns.replacement_url())
.replace(&patterns.http_origin(), &patterns.replacement_url());
- if new_action != action {
- el.set_attribute("action", &new_action)?;
+ if new_src != src {
+ el.set_attribute("src", &new_src)?;
}
}
- Ok(())
}
- }),
- // Replace URLs in srcset attributes (for responsive images)
- element!("[srcset]", {
- let patterns = patterns.clone();
- move |el| {
- if let Some(srcset) = el.get_attribute("srcset") {
- let new_srcset = srcset
- .replace(&patterns.https_origin(), &patterns.replacement_url())
- .replace(&patterns.http_origin(), &patterns.replacement_url())
- .replace(
- &patterns.protocol_relative_origin(),
- &patterns.protocol_relative_replacement(),
- )
- .replace(&patterns.origin_host, &patterns.request_host);
-
- if new_srcset != srcset {
- el.set_attribute("srcset", &new_srcset)?;
- }
+ Ok(())
+ }
+ }),
+ // Replace URLs in action attributes
+ element!("[action]", {
+ let patterns = patterns.clone();
+ move |el| {
+ if let Some(action) = el.get_attribute("action") {
+ let new_action = action
+ .replace(&patterns.https_origin(), &patterns.replacement_url())
+ .replace(&patterns.http_origin(), &patterns.replacement_url());
+ if new_action != action {
+ el.set_attribute("action", &new_action)?;
}
- Ok(())
}
- }),
- // Replace URLs in imagesrcset attributes (for link preload)
- element!("[imagesrcset]", {
- let patterns = patterns.clone();
- move |el| {
- if let Some(imagesrcset) = el.get_attribute("imagesrcset") {
- let new_imagesrcset = imagesrcset
- .replace(&patterns.https_origin(), &patterns.replacement_url())
- .replace(&patterns.http_origin(), &patterns.replacement_url())
- .replace(
- &patterns.protocol_relative_origin(),
- &patterns.protocol_relative_replacement(),
- );
- if new_imagesrcset != imagesrcset {
- el.set_attribute("imagesrcset", &new_imagesrcset)?;
- }
+ Ok(())
+ }
+ }),
+ // Replace URLs in srcset attributes (for responsive images)
+ element!("[srcset]", {
+ let patterns = patterns.clone();
+ move |el| {
+ if let Some(srcset) = el.get_attribute("srcset") {
+ let new_srcset = srcset
+ .replace(&patterns.https_origin(), &patterns.replacement_url())
+ .replace(&patterns.http_origin(), &patterns.replacement_url())
+ .replace(
+ &patterns.protocol_relative_origin(),
+ &patterns.protocol_relative_replacement(),
+ )
+ .replace(&patterns.origin_host, &patterns.request_host);
+
+ if new_srcset != srcset {
+ el.set_attribute("srcset", &new_srcset)?;
+ }
+ }
+ Ok(())
+ }
+ }),
+ // Replace URLs in imagesrcset attributes (for link preload)
+ element!("[imagesrcset]", {
+ let patterns = patterns.clone();
+ move |el| {
+ if let Some(imagesrcset) = el.get_attribute("imagesrcset") {
+ let new_imagesrcset = imagesrcset
+ .replace(&patterns.https_origin(), &patterns.replacement_url())
+ .replace(&patterns.http_origin(), &patterns.replacement_url())
+ .replace(
+ &patterns.protocol_relative_origin(),
+ &patterns.protocol_relative_replacement(),
+ );
+ if new_imagesrcset != imagesrcset {
+ el.set_attribute("imagesrcset", &new_imagesrcset)?;
}
- Ok(())
}
- }),
- ],
+ Ok(())
+ }
+ }),
+ ];
+
+ if config.nextjs_enabled && !nextjs_attributes.is_empty() {
+ element_content_handlers.push(text!("script#__NEXT_DATA__", {
+ let patterns = patterns.clone();
+ let attributes = nextjs_attributes.clone();
+ move |text| {
+ let content = text.as_str();
+ if let Some(rewritten) = patterns.rewrite_nextjs_values(content, &attributes) {
+ text.replace(&rewritten, ContentType::Text);
+ }
+ Ok(())
+ }
+ }));
+
+ element_content_handlers.push(text!("script", {
+ let patterns = patterns.clone();
+ let attributes = nextjs_attributes.clone();
+ move |text| {
+ let content = text.as_str();
+ if !content.contains("self.__next_f") {
+ return Ok(());
+ }
+ if let Some(rewritten) = patterns.rewrite_nextjs_values(content, &attributes) {
+ text.replace(&rewritten, ContentType::Text);
+ }
+ Ok(())
+ }
+ }));
+ }
+
+ let rewriter_settings = RewriterSettings {
+ element_content_handlers,
// TODO: Consider adding text content replacement if needed with settings
// // Replace URLs in text content
@@ -238,6 +309,8 @@ mod tests {
request_host: "test.example.com".to_string(),
request_scheme: "https".to_string(),
enable_prebid: false,
+ nextjs_enabled: false,
+ nextjs_attributes: vec!["href".to_string(), "link".to_string(), "url".to_string()],
}
}
@@ -318,6 +391,137 @@ mod tests {
assert!(processed.contains("/static/tsjs=tsjs-core.min.js"));
}
+ #[test]
+ fn test_rewrites_nextjs_script_when_enabled() {
+ let html = r#"
+
+ "#;
+
+ let mut config = create_test_config();
+ config.nextjs_enabled = true;
+ config.nextjs_attributes = vec!["href".to_string(), "link".to_string(), "url".to_string()];
+ let processor = create_html_processor(config);
+ let pipeline_config = PipelineConfig {
+ input_compression: Compression::None,
+ output_compression: Compression::None,
+ chunk_size: 8192,
+ };
+ let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
+
+ let mut output = Vec::new();
+ pipeline
+ .process(Cursor::new(html.as_bytes()), &mut output)
+ .unwrap();
+ let processed = String::from_utf8_lossy(&output);
+ println!("processed={processed}");
+ println!("processed stream payload: {}", processed);
+ println!("processed stream payload: {}", processed);
+
+ assert!(
+ processed.contains(r#""href":"https://test.example.com/reviews""#),
+ "Should rewrite https Next.js href values"
+ );
+ assert!(
+ processed.contains(r#""href":"https://test.example.com/sign-in""#),
+ "Should rewrite http Next.js href values"
+ );
+ assert!(
+ processed.contains(r#""fallbackHref":"http://origin.example.com/legacy""#),
+ "Should leave other fields untouched"
+ );
+ assert!(
+ processed.contains(r#""protoRelative":"//origin.example.com/assets/logo.png""#),
+ "Should not rewrite non-href keys"
+ );
+ assert!(
+ !processed.contains("\"href\":\"https://origin.example.com/reviews\""),
+ "Should remove origin https href"
+ );
+ assert!(
+ !processed.contains("\"href\":\"http://origin.example.com/sign-in\""),
+ "Should remove origin http href"
+ );
+ }
+
+ #[test]
+ fn test_rewrites_nextjs_stream_payload() {
+ let html = r#"
+
+ "#;
+
+ let mut config = create_test_config();
+ config.nextjs_enabled = true;
+ config.nextjs_attributes = vec!["href".to_string(), "link".to_string(), "url".to_string()];
+ let processor = create_html_processor(config);
+ let pipeline_config = PipelineConfig {
+ input_compression: Compression::None,
+ output_compression: Compression::None,
+ chunk_size: 8192,
+ };
+ let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
+
+ let mut output = Vec::new();
+ pipeline
+ .process(Cursor::new(html.as_bytes()), &mut output)
+ .unwrap();
+ let processed = String::from_utf8_lossy(&output);
+ let normalized = processed.replace('\\', "");
+ assert!(
+ normalized.contains("\"href\":\"https://test.example.com/dashboard\""),
+ "Should rewrite escaped href sequences inside streamed payloads. Content: {}",
+ normalized
+ );
+ assert!(
+ normalized.contains("\"href\":\"https://test.example.com/secondary\""),
+ "Should rewrite plain href attributes inside streamed payloads"
+ );
+ assert!(
+ normalized.contains("\"link\":\"https://test.example.com/api-test\""),
+ "Should rewrite additional configured attributes like link"
+ );
+ assert!(
+ processed.contains("\"dataHost\":\"https://origin.example.com/api\""),
+ "Should leave non-href properties untouched"
+ );
+ }
+
+ #[test]
+ fn test_nextjs_rewrite_respects_flag() {
+ let html = r#"
+
+ "#;
+
+ let config = create_test_config();
+ let processor = create_html_processor(config);
+ let pipeline_config = PipelineConfig {
+ input_compression: Compression::None,
+ output_compression: Compression::None,
+ chunk_size: 8192,
+ };
+ let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
+
+ let mut output = Vec::new();
+ pipeline
+ .process(Cursor::new(html.as_bytes()), &mut output)
+ .unwrap();
+ let processed = String::from_utf8_lossy(&output);
+
+ assert!(
+ processed.contains("origin.example.com"),
+ "Should leave Next.js data untouched when disabled"
+ );
+ assert!(
+ !processed.contains("test.example.com/reviews"),
+ "Should not rewrite Next.js data when flag is off"
+ );
+ }
+
#[test]
fn test_create_html_processor_url_replacement() {
let config = create_test_config();
@@ -365,6 +569,15 @@ mod tests {
assert_eq!(config.request_host, "proxy.example.com");
assert_eq!(config.request_scheme, "https");
assert!(config.enable_prebid); // Uses default true
+ assert!(
+ !config.nextjs_enabled,
+ "Next.js rewrites should default to disabled"
+ );
+ assert_eq!(
+ config.nextjs_attributes,
+ vec!["href".to_string(), "link".to_string(), "url".to_string()],
+ "Should default to rewriting href/link/url attributes"
+ );
}
#[test]
diff --git a/crates/common/src/settings.rs b/crates/common/src/settings.rs
index b46ba1c..2c7940d 100644
--- a/crates/common/src/settings.rs
+++ b/crates/common/src/settings.rs
@@ -3,7 +3,10 @@ use core::str;
use config::{Config, Environment, File, FileFormat};
use error_stack::{Report, ResultExt};
use regex::Regex;
-use serde::{de::DeserializeOwned, Deserialize, Deserializer, Serialize};
+use serde::{
+ de::{DeserializeOwned, IntoDeserializer},
+ Deserialize, Deserializer, Serialize,
+};
use serde_json::Value as JsonValue;
use std::collections::HashMap;
use std::sync::OnceLock;
@@ -23,6 +26,9 @@ pub struct Publisher {
/// Secret used to encrypt/decrypt proxied URLs in `/first-party/proxy`.
/// Keep this secret stable to allow existing links to decode.
pub proxy_secret: String,
+ #[serde(default)]
+ #[validate(nested)]
+ pub nextjs: NextJs,
}
impl Publisher {
@@ -37,6 +43,7 @@ impl Publisher {
/// cookie_domain: ".example.com".to_string(),
/// origin_url: "https://origin.example.com:8080".to_string(),
/// proxy_secret: "proxy-secret".to_string(),
+ /// nextjs: Default::default(),
/// };
/// assert_eq!(publisher.origin_host(), "origin.example.com:8080");
/// ```
@@ -79,6 +86,42 @@ fn default_auto_configure() -> bool {
true
}
+#[derive(Debug, Deserialize, Serialize, Validate)]
+pub struct NextJs {
+ #[serde(default)]
+ pub enabled: bool,
+ #[serde(
+ default = "default_nextjs_attributes",
+ deserialize_with = "deserialize_nextjs_attributes"
+ )]
+ pub rewrite_attributes: Vec,
+}
+
+fn default_nextjs_attributes() -> Vec {
+ vec!["href".to_string(), "link".to_string(), "url".to_string()]
+}
+
+impl Default for NextJs {
+ fn default() -> Self {
+ Self {
+ enabled: false,
+ rewrite_attributes: default_nextjs_attributes(),
+ }
+ }
+}
+
+fn deserialize_nextjs_attributes<'de, D>(deserializer: D) -> Result, D::Error>
+where
+ D: Deserializer<'de>,
+{
+ let value = Option::::deserialize(deserializer)?;
+ match value {
+ Some(json) => vec_from_seq_or_map(json.into_deserializer())
+ .map_err(::custom),
+ None => Ok(default_nextjs_attributes()),
+ }
+}
+
#[allow(unused)]
#[derive(Debug, Default, Deserialize, Serialize, Validate)]
pub struct Synthetic {
@@ -130,6 +173,7 @@ pub struct Settings {
pub publisher: Publisher,
#[validate(nested)]
pub prebid: Prebid,
+ #[serde(default)]
#[validate(nested)]
pub synthetic: Synthetic,
#[serde(default, deserialize_with = "vec_from_seq_or_map")]
@@ -291,6 +335,15 @@ mod tests {
assert!(!settings.publisher.origin_url.is_empty());
assert!(!settings.prebid.server_url.is_empty());
+ assert!(
+ !settings.publisher.nextjs.enabled,
+ "Next.js URL rewriting should default to disabled"
+ );
+ assert_eq!(
+ settings.publisher.nextjs.rewrite_attributes,
+ vec!["href".to_string(), "link".to_string(), "url".to_string()],
+ "Next.js rewrite attributes should default to href/link/url"
+ );
assert!(!settings.synthetic.counter_store.is_empty());
assert!(!settings.synthetic.opid_store.is_empty());
@@ -310,6 +363,15 @@ mod tests {
settings.prebid.server_url,
"https://test-prebid.com/openrtb2/auction"
);
+ assert!(
+ !settings.publisher.nextjs.enabled,
+ "Next.js URL rewriting should default to disabled"
+ );
+ assert_eq!(
+ settings.publisher.nextjs.rewrite_attributes,
+ vec!["href".to_string(), "link".to_string(), "url".to_string()],
+ "Next.js rewrite attributes should default to href/link/url"
+ );
assert_eq!(settings.publisher.domain, "test-publisher.com");
assert_eq!(settings.publisher.cookie_domain, ".test-publisher.com");
assert_eq!(
@@ -568,6 +630,7 @@ mod tests {
cookie_domain: ".example.com".to_string(),
origin_url: "https://origin.example.com:8080".to_string(),
proxy_secret: "test-secret".to_string(),
+ nextjs: NextJs::default(),
};
assert_eq!(publisher.origin_host(), "origin.example.com:8080");
@@ -577,6 +640,7 @@ mod tests {
cookie_domain: ".example.com".to_string(),
origin_url: "https://origin.example.com".to_string(),
proxy_secret: "test-secret".to_string(),
+ nextjs: NextJs::default(),
};
assert_eq!(publisher.origin_host(), "origin.example.com");
@@ -586,6 +650,7 @@ mod tests {
cookie_domain: ".example.com".to_string(),
origin_url: "http://localhost:9090".to_string(),
proxy_secret: "test-secret".to_string(),
+ nextjs: NextJs::default(),
};
assert_eq!(publisher.origin_host(), "localhost:9090");
@@ -595,6 +660,7 @@ mod tests {
cookie_domain: ".example.com".to_string(),
origin_url: "localhost:9090".to_string(),
proxy_secret: "test-secret".to_string(),
+ nextjs: NextJs::default(),
};
assert_eq!(publisher.origin_host(), "localhost:9090");
@@ -604,6 +670,7 @@ mod tests {
cookie_domain: ".example.com".to_string(),
origin_url: "http://192.168.1.1:8080".to_string(),
proxy_secret: "test-secret".to_string(),
+ nextjs: NextJs::default(),
};
assert_eq!(publisher.origin_host(), "192.168.1.1:8080");
@@ -613,6 +680,7 @@ mod tests {
cookie_domain: ".example.com".to_string(),
origin_url: "http://[::1]:8080".to_string(),
proxy_secret: "test-secret".to_string(),
+ nextjs: NextJs::default(),
};
assert_eq!(publisher.origin_host(), "[::1]:8080");
}
diff --git a/trusted-server.toml b/trusted-server.toml
index 50ae25a..6ec6eae 100644
--- a/trusted-server.toml
+++ b/trusted-server.toml
@@ -9,6 +9,10 @@ cookie_domain = ".test-publisher.com"
origin_url = "https://origin.test-publisher.com"
proxy_secret = "change-me-proxy-secret"
+[publisher.nextjs]
+enabled = false
+rewrite_attributes = ["href", "link", "url"]
+
[prebid]
server_url = "http://68.183.113.79:8000"
timeout_ms = 1000