Skip to content

Commit 2d40b79

Browse files
committed
Merge branch 'feature/refactor-nextjs-integration' into feature/integration-proxy
2 parents c109ba3 + c9f9ab6 commit 2d40b79

File tree

14 files changed

+1417
-1349
lines changed

14 files changed

+1417
-1349
lines changed

crates/common/src/ad.rs

Lines changed: 0 additions & 435 deletions
This file was deleted.

crates/common/src/html_processor.rs

Lines changed: 197 additions & 219 deletions
Large diffs are not rendered by default.

crates/common/src/integrations/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
33
use crate::settings::Settings;
44

5+
pub mod nextjs;
6+
pub mod prebid;
57
mod registry;
68
pub mod testlight;
79

@@ -15,5 +17,5 @@ pub use registry::{
1517
type IntegrationBuilder = fn(&Settings) -> Option<IntegrationRegistration>;
1618

1719
pub(crate) fn builders() -> &'static [IntegrationBuilder] {
18-
&[testlight::register]
20+
&[prebid::register, testlight::register, nextjs::register]
1921
}
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
use std::sync::Arc;
2+
3+
use regex::{escape, Regex};
4+
use serde::{Deserialize, Serialize};
5+
use validator::Validate;
6+
7+
use crate::integrations::{
8+
IntegrationRegistration, IntegrationScriptContext, IntegrationScriptRewriter,
9+
ScriptRewriteAction,
10+
};
11+
use crate::settings::{IntegrationConfig, Settings};
12+
13+
const NEXTJS_INTEGRATION_ID: &str = "nextjs";
14+
15+
#[derive(Debug, Clone, Deserialize, Serialize, Validate)]
16+
pub struct NextJsIntegrationConfig {
17+
#[serde(default = "default_enabled")]
18+
pub enabled: bool,
19+
#[serde(
20+
default = "default_rewrite_attributes",
21+
deserialize_with = "crate::settings::vec_from_seq_or_map"
22+
)]
23+
#[validate(length(min = 1))]
24+
pub rewrite_attributes: Vec<String>,
25+
}
26+
27+
impl IntegrationConfig for NextJsIntegrationConfig {
28+
fn is_enabled(&self) -> bool {
29+
self.enabled
30+
}
31+
}
32+
33+
fn default_enabled() -> bool {
34+
false
35+
}
36+
37+
fn default_rewrite_attributes() -> Vec<String> {
38+
vec!["href".to_string(), "link".to_string(), "url".to_string()]
39+
}
40+
41+
pub fn register(settings: &Settings) -> Option<IntegrationRegistration> {
42+
let config = build(settings)?;
43+
let structured = Arc::new(NextJsScriptRewriter::new(
44+
Arc::clone(&config),
45+
NextJsRewriteMode::Structured,
46+
));
47+
let streamed = Arc::new(NextJsScriptRewriter::new(
48+
config,
49+
NextJsRewriteMode::Streamed,
50+
));
51+
52+
Some(
53+
IntegrationRegistration::builder(NEXTJS_INTEGRATION_ID)
54+
.with_script_rewriter(structured)
55+
.with_script_rewriter(streamed)
56+
.build(),
57+
)
58+
}
59+
60+
fn build(settings: &Settings) -> Option<Arc<NextJsIntegrationConfig>> {
61+
let config = settings
62+
.integration_config::<NextJsIntegrationConfig>(NEXTJS_INTEGRATION_ID)
63+
.ok()
64+
.flatten()?;
65+
Some(Arc::new(config))
66+
}
67+
68+
#[derive(Clone, Copy)]
69+
enum NextJsRewriteMode {
70+
Structured,
71+
Streamed,
72+
}
73+
74+
struct NextJsScriptRewriter {
75+
config: Arc<NextJsIntegrationConfig>,
76+
mode: NextJsRewriteMode,
77+
}
78+
79+
impl NextJsScriptRewriter {
80+
fn new(config: Arc<NextJsIntegrationConfig>, mode: NextJsRewriteMode) -> Self {
81+
Self { config, mode }
82+
}
83+
84+
fn rewrite_values(
85+
&self,
86+
content: &str,
87+
ctx: &IntegrationScriptContext<'_>,
88+
) -> ScriptRewriteAction {
89+
if let Some(rewritten) = rewrite_nextjs_values(
90+
content,
91+
ctx.origin_host,
92+
ctx.request_host,
93+
ctx.request_scheme,
94+
&self.config.rewrite_attributes,
95+
) {
96+
ScriptRewriteAction::replace(rewritten)
97+
} else {
98+
ScriptRewriteAction::keep()
99+
}
100+
}
101+
}
102+
103+
impl IntegrationScriptRewriter for NextJsScriptRewriter {
104+
fn integration_id(&self) -> &'static str {
105+
NEXTJS_INTEGRATION_ID
106+
}
107+
108+
fn selector(&self) -> &'static str {
109+
match self.mode {
110+
NextJsRewriteMode::Structured => "script#__NEXT_DATA__",
111+
NextJsRewriteMode::Streamed => "script",
112+
}
113+
}
114+
115+
fn rewrite(&self, content: &str, ctx: &IntegrationScriptContext<'_>) -> ScriptRewriteAction {
116+
if self.config.rewrite_attributes.is_empty() {
117+
return ScriptRewriteAction::keep();
118+
}
119+
120+
match self.mode {
121+
NextJsRewriteMode::Structured => self.rewrite_values(content, ctx),
122+
NextJsRewriteMode::Streamed => {
123+
if !content.contains("self.__next_f") {
124+
return ScriptRewriteAction::keep();
125+
}
126+
self.rewrite_values(content, ctx)
127+
}
128+
}
129+
}
130+
}
131+
132+
fn rewrite_nextjs_values(
133+
content: &str,
134+
origin_host: &str,
135+
request_host: &str,
136+
request_scheme: &str,
137+
attributes: &[String],
138+
) -> Option<String> {
139+
if origin_host.is_empty() || request_host.is_empty() || attributes.is_empty() {
140+
return None;
141+
}
142+
143+
let mut rewritten = content.to_string();
144+
let mut changed = false;
145+
let escaped_origin = escape(origin_host);
146+
let replacement_scheme = format!("{}://{}", request_scheme, request_host);
147+
148+
for attribute in attributes {
149+
let escaped_attr = escape(attribute);
150+
let pattern = format!(
151+
r#"(?P<prefix>(?:\\*")?{attr}(?:\\*")?:\\*")(?P<scheme>https?://|//){origin}"#,
152+
attr = escaped_attr,
153+
origin = escaped_origin,
154+
);
155+
let regex = Regex::new(&pattern).expect("valid Next.js rewrite regex");
156+
let next_value = regex.replace_all(&rewritten, |caps: &regex::Captures<'_>| {
157+
let scheme = &caps["scheme"];
158+
let replacement = if scheme == "//" {
159+
format!("//{}", request_host)
160+
} else {
161+
replacement_scheme.clone()
162+
};
163+
format!("{}{}", &caps["prefix"], replacement)
164+
});
165+
if next_value != rewritten {
166+
changed = true;
167+
rewritten = next_value.into_owned();
168+
}
169+
}
170+
171+
changed.then_some(rewritten)
172+
}
173+
174+
#[cfg(test)]
175+
mod tests {
176+
use super::*;
177+
use crate::integrations::{IntegrationScriptContext, ScriptRewriteAction};
178+
179+
fn test_config() -> Arc<NextJsIntegrationConfig> {
180+
Arc::new(NextJsIntegrationConfig {
181+
enabled: true,
182+
rewrite_attributes: vec!["href".into(), "link".into(), "url".into()],
183+
})
184+
}
185+
186+
fn ctx(selector: &'static str) -> IntegrationScriptContext<'static> {
187+
IntegrationScriptContext {
188+
selector,
189+
request_host: "ts.example.com",
190+
request_scheme: "https",
191+
origin_host: "origin.example.com",
192+
}
193+
}
194+
195+
#[test]
196+
fn structured_rewriter_updates_next_data_payload() {
197+
let payload = r#"{"props":{"pageProps":{"primary":{"href":"https://origin.example.com/reviews"},"secondary":{"href":"http://origin.example.com/sign-in"},"fallbackHref":"http://origin.example.com/legacy","protoRelative":"//origin.example.com/assets/logo.png"}}}"#;
198+
let rewriter = NextJsScriptRewriter::new(test_config(), NextJsRewriteMode::Structured);
199+
let result = rewriter.rewrite(payload, &ctx("script#__NEXT_DATA__"));
200+
201+
match result {
202+
ScriptRewriteAction::Replace(value) => {
203+
assert!(value.contains(r#""href":"https://ts.example.com/reviews""#));
204+
assert!(value.contains(r#""href":"https://ts.example.com/sign-in""#));
205+
assert!(value.contains(r#""fallbackHref":"http://origin.example.com/legacy""#));
206+
assert!(value.contains(r#""protoRelative":"//origin.example.com/assets/logo.png""#));
207+
}
208+
_ => panic!("Expected rewrite to update payload"),
209+
}
210+
}
211+
212+
#[test]
213+
fn streamed_rewriter_only_runs_for_next_payloads() {
214+
let rewriter = NextJsScriptRewriter::new(test_config(), NextJsRewriteMode::Streamed);
215+
216+
let noop = rewriter.rewrite("console.log('hello');", &ctx("script"));
217+
assert!(matches!(noop, ScriptRewriteAction::Keep));
218+
219+
let payload = r#"self.__next_f.push(["chunk", "{\"href\":\"https://origin.example.com/app\"}"]);
220+
"#;
221+
let rewritten = rewriter.rewrite(payload, &ctx("script"));
222+
match rewritten {
223+
ScriptRewriteAction::Replace(value) => {
224+
assert!(value.contains(r#"https://ts.example.com/app"#));
225+
}
226+
_ => panic!("Expected streamed payload rewrite"),
227+
}
228+
}
229+
230+
#[test]
231+
fn rewrite_helper_handles_protocol_relative_urls() {
232+
let content = r#"{"props":{"pageProps":{"link":"//origin.example.com/image.png"}}}"#;
233+
let rewritten = rewrite_nextjs_values(
234+
content,
235+
"origin.example.com",
236+
"ts.example.com",
237+
"https",
238+
&["link".into()],
239+
)
240+
.expect("should rewrite protocol relative link");
241+
242+
assert!(rewritten.contains(r#""link":"//ts.example.com/image.png""#));
243+
}
244+
}

0 commit comments

Comments
 (0)