Skip to content

Commit 1ebda38

Browse files
committed
Make list of attributes to rewrite for Next.js configurable
1 parent 7ce1831 commit 1ebda38

File tree

3 files changed

+106
-42
lines changed

3 files changed

+106
-42
lines changed

crates/common/src/html_processor.rs

Lines changed: 56 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::cell::Cell;
55
use std::rc::Rc;
66

77
use lol_html::{element, html_content::ContentType, text, Settings as RewriterSettings};
8+
use regex::Regex;
89

910
use crate::settings::Settings;
1011
use crate::streaming_processor::{HtmlRewriterAdapter, StreamProcessor};
@@ -17,7 +18,8 @@ pub struct HtmlProcessorConfig {
1718
pub request_host: String,
1819
pub request_scheme: String,
1920
pub enable_prebid: bool,
20-
pub nextjs_rewrite_urls: bool,
21+
pub nextjs_enabled: bool,
22+
pub nextjs_attributes: Vec<String>,
2123
}
2224

2325
impl HtmlProcessorConfig {
@@ -33,7 +35,8 @@ impl HtmlProcessorConfig {
3335
request_host: request_host.to_string(),
3436
request_scheme: request_scheme.to_string(),
3537
enable_prebid: settings.prebid.auto_configure,
36-
nextjs_rewrite_urls: settings.publisher.nextjs.rewrite_urls,
38+
nextjs_enabled: settings.publisher.nextjs.enabled,
39+
nextjs_attributes: settings.publisher.nextjs.rewrite_attributes.clone(),
3740
}
3841
}
3942
}
@@ -68,29 +71,32 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
6871
format!("//{}", self.request_host)
6972
}
7073

71-
fn rewrite_nextjs_hrefs(&self, content: &str) -> Option<String> {
74+
fn rewrite_nextjs_values(&self, content: &str, attributes: &[String]) -> Option<String> {
7275
let mut rewritten = content.to_string();
7376
let mut changed = false;
74-
for prefix in ["\"href\":\"", "\\\"href\\\":\\\""] {
75-
let https_pattern = format!("{}https://{}", prefix, self.origin_host);
76-
let http_pattern = format!("{}http://{}", prefix, self.origin_host);
77-
let proto_pattern = format!("{}//{}", prefix, self.origin_host);
78-
79-
let href_replacement =
80-
format!("{}{}://{}", prefix, self.request_scheme, self.request_host);
81-
let proto_replacement = format!("{}//{}", prefix, self.request_host);
82-
83-
let new_rewritten = rewritten
84-
.replace(&https_pattern, &href_replacement)
85-
.replace(&http_pattern, &href_replacement)
86-
.replace(&proto_pattern, &proto_replacement);
87-
88-
if new_rewritten != rewritten {
77+
let escaped_origin = regex::escape(&self.origin_host);
78+
for attribute in attributes {
79+
let escaped_attr = regex::escape(attribute);
80+
let pattern = format!(
81+
r#"(?P<prefix>(?:\\*")?{attr}(?:\\*")?:\\*")(?P<scheme>https?://|//){origin}"#,
82+
attr = escaped_attr,
83+
origin = escaped_origin
84+
);
85+
let regex = Regex::new(&pattern).expect("valid Next.js rewrite regex");
86+
let new_value = regex.replace_all(&rewritten, |caps: &regex::Captures| {
87+
let scheme = &caps["scheme"];
88+
let replacement = if scheme == "//" {
89+
format!("//{}", self.request_host)
90+
} else {
91+
self.replacement_url()
92+
};
93+
format!("{}{}", &caps["prefix"], replacement)
94+
});
95+
if new_value != rewritten {
8996
changed = true;
90-
rewritten = new_rewritten;
97+
rewritten = new_value.into_owned();
9198
}
9299
}
93-
94100
if changed {
95101
Some(rewritten)
96102
} else {
@@ -105,6 +111,8 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
105111
request_scheme: config.request_scheme.clone(),
106112
});
107113

114+
let nextjs_attributes = Rc::new(config.nextjs_attributes.clone());
115+
108116
let injected_tsjs = Rc::new(Cell::new(false));
109117

110118
fn is_prebid_script_url(url: &str) -> bool {
@@ -230,12 +238,13 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
230238
}),
231239
];
232240

233-
if config.nextjs_rewrite_urls {
241+
if config.nextjs_enabled && !nextjs_attributes.is_empty() {
234242
element_content_handlers.push(text!("script#__NEXT_DATA__", {
235243
let patterns = patterns.clone();
244+
let attributes = nextjs_attributes.clone();
236245
move |text| {
237246
let content = text.as_str();
238-
if let Some(rewritten) = patterns.rewrite_nextjs_hrefs(content) {
247+
if let Some(rewritten) = patterns.rewrite_nextjs_values(content, &attributes) {
239248
text.replace(&rewritten, ContentType::Text);
240249
}
241250
Ok(())
@@ -244,12 +253,13 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
244253

245254
element_content_handlers.push(text!("script", {
246255
let patterns = patterns.clone();
256+
let attributes = nextjs_attributes.clone();
247257
move |text| {
248258
let content = text.as_str();
249259
if !content.contains("self.__next_f") {
250260
return Ok(());
251261
}
252-
if let Some(rewritten) = patterns.rewrite_nextjs_hrefs(content) {
262+
if let Some(rewritten) = patterns.rewrite_nextjs_values(content, &attributes) {
253263
text.replace(&rewritten, ContentType::Text);
254264
}
255265
Ok(())
@@ -299,7 +309,8 @@ mod tests {
299309
request_host: "test.example.com".to_string(),
300310
request_scheme: "https".to_string(),
301311
enable_prebid: false,
302-
nextjs_rewrite_urls: false,
312+
nextjs_enabled: false,
313+
nextjs_attributes: vec!["href".to_string(), "link".to_string(), "url".to_string()],
303314
}
304315
}
305316

@@ -389,7 +400,8 @@ mod tests {
389400
</body></html>"#;
390401

391402
let mut config = create_test_config();
392-
config.nextjs_rewrite_urls = true;
403+
config.nextjs_enabled = true;
404+
config.nextjs_attributes = vec!["href".to_string(), "link".to_string(), "url".to_string()];
393405
let processor = create_html_processor(config);
394406
let pipeline_config = PipelineConfig {
395407
input_compression: Compression::None,
@@ -403,6 +415,8 @@ mod tests {
403415
.process(Cursor::new(html.as_bytes()), &mut output)
404416
.unwrap();
405417
let processed = String::from_utf8_lossy(&output);
418+
println!("processed={processed}");
419+
println!("processed stream payload: {}", processed);
406420
println!("processed stream payload: {}", processed);
407421

408422
assert!(
@@ -435,12 +449,13 @@ mod tests {
435449
fn test_rewrites_nextjs_stream_payload() {
436450
let html = r#"<html><body>
437451
<script>
438-
self.__next_f.push([1,"chunk", "prefix {\"inner\":\"value\"} \"href\":\"http://origin.example.com/dashboard\", \"href\":\"https://origin.example.com/api-test\" suffix", {"dataHost":"https://origin.example.com/api"}]);
452+
self.__next_f.push([1,"chunk", "prefix {\"inner\":\"value\"} \\\"href\\\":\\\"http://origin.example.com/dashboard\\\", \\\"link\\\":\\\"https://origin.example.com/api-test\\\" suffix", {"href":"http://origin.example.com/secondary","dataHost":"https://origin.example.com/api"}]);
439453
</script>
440454
</body></html>"#;
441455

442456
let mut config = create_test_config();
443-
config.nextjs_rewrite_urls = true;
457+
config.nextjs_enabled = true;
458+
config.nextjs_attributes = vec!["href".to_string(), "link".to_string(), "url".to_string()];
444459
let processor = create_html_processor(config);
445460
let pipeline_config = PipelineConfig {
446461
input_compression: Compression::None,
@@ -454,18 +469,19 @@ mod tests {
454469
.process(Cursor::new(html.as_bytes()), &mut output)
455470
.unwrap();
456471
let processed = String::from_utf8_lossy(&output);
472+
let normalized = processed.replace('\\', "");
457473
assert!(
458-
processed.contains("https://test.example.com/dashboard"),
459-
"Should rewrite URLs inside streamed Next.js payloads"
474+
normalized.contains("\"href\":\"https://test.example.com/dashboard\""),
475+
"Should rewrite escaped href sequences inside streamed payloads. Content: {}",
476+
normalized
460477
);
461478
assert!(
462-
processed.contains("\\\"href\\\":\\\"https://test.example.com/api-test\\\"")
463-
|| processed.contains("\"href\":\"https://test.example.com/api-test\""),
464-
"Should rewrite escaped href sequences inside streamed payloads"
479+
normalized.contains("\"href\":\"https://test.example.com/secondary\""),
480+
"Should rewrite plain href attributes inside streamed payloads"
465481
);
466482
assert!(
467-
!processed.contains("\"href\":\"http://origin.example.com/dashboard\""),
468-
"Should remove origin host references from href fields"
483+
normalized.contains("\"link\":\"https://test.example.com/api-test\""),
484+
"Should rewrite additional configured attributes like link"
469485
);
470486
assert!(
471487
processed.contains("\"dataHost\":\"https://origin.example.com/api\""),
@@ -554,9 +570,14 @@ mod tests {
554570
assert_eq!(config.request_scheme, "https");
555571
assert!(config.enable_prebid); // Uses default true
556572
assert!(
557-
!config.nextjs_rewrite_urls,
573+
!config.nextjs_enabled,
558574
"Next.js rewrites should default to disabled"
559575
);
576+
assert_eq!(
577+
config.nextjs_attributes,
578+
vec!["href".to_string(), "link".to_string(), "url".to_string()],
579+
"Should default to rewriting href/link/url attributes"
580+
);
560581
}
561582

562583
#[test]

crates/common/src/settings.rs

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ use core::str;
33
use config::{Config, Environment, File, FileFormat};
44
use error_stack::{Report, ResultExt};
55
use regex::Regex;
6-
use serde::{de::DeserializeOwned, Deserialize, Deserializer, Serialize};
6+
use serde::{
7+
de::{DeserializeOwned, IntoDeserializer},
8+
Deserialize, Deserializer, Serialize,
9+
};
710
use serde_json::Value as JsonValue;
811
use std::collections::HashMap;
912
use std::sync::OnceLock;
@@ -83,10 +86,40 @@ fn default_auto_configure() -> bool {
8386
true
8487
}
8588

86-
#[derive(Debug, Default, Deserialize, Serialize, Validate)]
89+
#[derive(Debug, Deserialize, Serialize, Validate)]
8790
pub struct NextJs {
8891
#[serde(default)]
89-
pub rewrite_urls: bool,
92+
pub enabled: bool,
93+
#[serde(
94+
default = "default_nextjs_attributes",
95+
deserialize_with = "deserialize_nextjs_attributes"
96+
)]
97+
pub rewrite_attributes: Vec<String>,
98+
}
99+
100+
fn default_nextjs_attributes() -> Vec<String> {
101+
vec!["href".to_string(), "link".to_string(), "url".to_string()]
102+
}
103+
104+
impl Default for NextJs {
105+
fn default() -> Self {
106+
Self {
107+
enabled: false,
108+
rewrite_attributes: default_nextjs_attributes(),
109+
}
110+
}
111+
}
112+
113+
fn deserialize_nextjs_attributes<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
114+
where
115+
D: Deserializer<'de>,
116+
{
117+
let value = Option::<JsonValue>::deserialize(deserializer)?;
118+
match value {
119+
Some(json) => vec_from_seq_or_map(json.into_deserializer())
120+
.map_err(<D::Error as serde::de::Error>::custom),
121+
None => Ok(default_nextjs_attributes()),
122+
}
90123
}
91124

92125
#[allow(unused)]
@@ -303,9 +336,14 @@ mod tests {
303336

304337
assert!(!settings.prebid.server_url.is_empty());
305338
assert!(
306-
!settings.publisher.nextjs.rewrite_urls,
339+
!settings.publisher.nextjs.enabled,
307340
"Next.js URL rewriting should default to disabled"
308341
);
342+
assert_eq!(
343+
settings.publisher.nextjs.rewrite_attributes,
344+
vec!["href".to_string(), "link".to_string(), "url".to_string()],
345+
"Next.js rewrite attributes should default to href/link/url"
346+
);
309347

310348
assert!(!settings.synthetic.counter_store.is_empty());
311349
assert!(!settings.synthetic.opid_store.is_empty());
@@ -326,9 +364,14 @@ mod tests {
326364
"https://test-prebid.com/openrtb2/auction"
327365
);
328366
assert!(
329-
!settings.publisher.nextjs.rewrite_urls,
367+
!settings.publisher.nextjs.enabled,
330368
"Next.js URL rewriting should default to disabled"
331369
);
370+
assert_eq!(
371+
settings.publisher.nextjs.rewrite_attributes,
372+
vec!["href".to_string(), "link".to_string(), "url".to_string()],
373+
"Next.js rewrite attributes should default to href/link/url"
374+
);
332375
assert_eq!(settings.publisher.domain, "test-publisher.com");
333376
assert_eq!(settings.publisher.cookie_domain, ".test-publisher.com");
334377
assert_eq!(

trusted-server.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ origin_url = "https://origin.test-publisher.com"
1010
proxy_secret = "change-me-proxy-secret"
1111

1212
[publisher.nextjs]
13-
# When true, rewrite Next.js JSON payload URLs to the request host
14-
rewrite_urls = false
13+
enabled = false
14+
rewrite_attributes = ["href", "link", "url"]
1515

1616
[prebid]
1717
server_url = "http://68.183.113.79:8000"

0 commit comments

Comments
 (0)