Skip to content

Commit 76cf1fb

Browse files
serde strategy
1 parent c925426 commit 76cf1fb

File tree

1 file changed

+134
-127
lines changed

1 file changed

+134
-127
lines changed

crates/common/src/integrations/nextjs.rs

Lines changed: 134 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -140,94 +140,129 @@ impl IntegrationScriptRewriter for NextJsScriptRewriter {
140140
}
141141
}
142142

143-
/// Recursively removes prebid script/link elements from Next.js JSON payloads
144-
/// Handles both array elements and objects, preserving init code
145-
fn clean_prebid_from_json(value: &mut JsonValue, remove_prebid: bool) -> bool {
143+
/// Extract Next.js streaming payload components
144+
/// Returns: (prefix, chunk_id, json_str_escaped)
145+
fn extract_nextjs_streaming_payload(content: &str) -> Option<(String, String, String)> {
146+
let pattern = Regex::new(r#"(self\.__next_[fs]\.push\(\[)(\d+),"(.+)"\]\)"#).ok()?;
147+
let caps = pattern.captures(content)?;
148+
149+
Some((
150+
caps.get(1)?.as_str().to_string(),
151+
caps.get(2)?.as_str().to_string(),
152+
caps.get(3)?.as_str().to_string(),
153+
))
154+
}
155+
156+
/// Check if a JSON value contains prebid file references
157+
fn has_prebid_reference(value: &JsonValue) -> bool {
158+
match value {
159+
JsonValue::String(s) => s.contains("prebid") && s.contains(".js"),
160+
JsonValue::Object(obj) => obj.values().any(|v| has_prebid_reference(v)),
161+
JsonValue::Array(arr) => arr.iter().any(|v| has_prebid_reference(v)),
162+
_ => false,
163+
}
164+
}
165+
166+
/// Check if props contain pbjs initialization code
167+
fn has_init_code(props: &JsonValue) -> bool {
168+
if let JsonValue::Object(obj) = props {
169+
if let Some(JsonValue::String(children)) = obj.get("children") {
170+
return children.contains("pbjs=pbjs||{}") || children.contains("pbjs.que");
171+
}
172+
}
173+
false
174+
}
175+
176+
/// Process a React element, returning true if it should be kept
177+
/// Modifies the element in place to remove prebid attributes
178+
fn process_react_element(elem: &mut JsonValue, remove_prebid: bool) -> bool {
146179
if !remove_prebid {
147-
return false;
180+
return true;
148181
}
149182

150-
let mut changed = false;
183+
// Check if this is a React element: ["$", "element_type", {props}]
184+
if let JsonValue::Array(arr) = elem {
185+
if arr.len() >= 3 {
186+
// Check for ["$", ...] pattern
187+
if let Some(JsonValue::String(dollar)) = arr.get(0) {
188+
if dollar != "$" {
189+
return true;
190+
}
191+
}
151192

152-
match value {
153-
JsonValue::Array(arr) => {
154-
// Check if this is a React element array like ["$", "script", {...}]
155-
if arr.len() >= 3 {
156-
if let (Some(JsonValue::String(dollar)), Some(JsonValue::String(elem_type))) =
157-
(arr.get(0), arr.get(1))
158-
{
159-
if dollar == "$" && (elem_type == "script" || elem_type == "link") {
160-
// Check if the props object (index 2) contains prebid
161-
if let Some(JsonValue::Object(props)) = arr.get(2) {
162-
let has_prebid = props.values().any(|v| {
163-
v.as_str()
164-
.map_or(false, |s| s.contains("prebid") && s.contains(".js"))
165-
});
166-
167-
if has_prebid {
168-
// For links, remove entirely by returning marker
169-
if elem_type == "link" {
170-
*value = JsonValue::Null;
171-
return true;
172-
}
173-
174-
// For scripts, check if it has init code
175-
let has_init = props
176-
.get("children")
177-
.and_then(|v| v.as_str())
178-
.map_or(false, |s| {
179-
s.contains("pbjs=pbjs||{}") || s.contains("pbjs.que")
180-
});
181-
182-
if has_init {
183-
// Keep script but remove src/href attributes
184-
if let Some(JsonValue::Object(props_mut)) = arr.get_mut(2) {
185-
changed = props_mut.remove("src").is_some() || changed;
186-
changed = props_mut.remove("href").is_some() || changed;
187-
}
188-
} else {
189-
// No init code, remove entirely
190-
*value = JsonValue::Null;
191-
return true;
192-
}
193+
// Get element type (clone to avoid borrow issues)
194+
let elem_type = arr.get(1).and_then(|v| v.as_str()).map(|s| s.to_string());
195+
196+
if let Some(elem_type) = elem_type {
197+
// Now we can get mutable reference to props
198+
if let Some(props) = arr.get_mut(2) {
199+
if !has_prebid_reference(props) {
200+
return true; // No prebid, keep as-is
201+
}
202+
203+
// Has prebid reference
204+
if elem_type == "link" {
205+
return false; // Remove link elements entirely
206+
}
207+
208+
if elem_type == "script" {
209+
if has_init_code(props) {
210+
// Keep script but remove src/href
211+
if let JsonValue::Object(obj) = props {
212+
obj.remove("src");
213+
obj.remove("href");
193214
}
215+
return true;
216+
} else {
217+
return false; // Remove script without init
194218
}
195219
}
196220
}
197221
}
222+
}
223+
}
198224

199-
// Recursively process array elements
200-
for item in arr.iter_mut() {
201-
changed = clean_prebid_from_json(item, remove_prebid) || changed;
202-
}
225+
true // Keep by default
226+
}
203227

204-
// Remove null markers
205-
arr.retain(|v| !v.is_null());
206-
}
207-
JsonValue::Object(obj) => {
208-
// Check if this is a Next.js streaming object like {children: "...", id: "..."}
209-
if let Some(JsonValue::String(children)) = obj.get("children") {
210-
if children.contains("prebid") && children.contains(".js") {
211-
let has_init =
212-
children.contains("pbjs=pbjs||{}") || children.contains("pbjs.que");
213-
214-
if !has_init {
215-
// Remove entire object by marking as null
216-
*value = JsonValue::Null;
217-
return true;
218-
}
219-
}
220-
}
228+
/// Clean Next.js streaming payloads using proper JSON parsing
229+
/// Handles: self.__next_f.push([id, "JSON_STRING"])
230+
fn clean_nextjs_streaming_payload(content: &str, remove_prebid: bool) -> Option<String> {
231+
// Extract payload components
232+
let (prefix, chunk_id, json_escaped) = extract_nextjs_streaming_payload(content)?;
221233

222-
// Recursively process object values
223-
for val in obj.values_mut() {
224-
changed = clean_prebid_from_json(val, remove_prebid) || changed;
225-
}
226-
}
227-
_ => {}
234+
// Unescape the JSON string
235+
let json_str = json_escaped.replace(r#"\""#, "\"");
236+
237+
// The JSON string contains comma-separated React elements, not a single valid JSON
238+
// Wrap it in array brackets to parse
239+
let json_with_brackets = format!("[{}]", json_str);
240+
241+
// Parse into Vec<JsonValue>
242+
let mut elements: Vec<JsonValue> = serde_json::from_str(&json_with_brackets).ok()?;
243+
244+
// Filter and transform elements
245+
elements.retain_mut(|elem| process_react_element(elem, remove_prebid));
246+
247+
// If all elements were removed, we still need valid JSON
248+
if elements.is_empty() {
249+
return Some(format!(r#"{}{},"[]"]);"#, prefix, chunk_id));
228250
}
229251

230-
changed
252+
// Serialize back to JSON
253+
let json_array = serde_json::to_string(&elements).ok()?;
254+
255+
// Remove outer array brackets
256+
let json_str_rebuilt = &json_array[1..json_array.len() - 1];
257+
258+
// Re-escape quotes
259+
let json_escaped_rebuilt = json_str_rebuilt.replace('"', r#"\""#);
260+
261+
// Reconstruct the full JavaScript
262+
Some(format!(
263+
r#"{}{},"{}"]);"#,
264+
prefix, chunk_id, json_escaped_rebuilt
265+
))
231266
}
232267

233268
fn rewrite_nextjs_values(
@@ -273,67 +308,39 @@ fn rewrite_nextjs_values(
273308

274309
// Remove prebid-related elements from Next.js payloads
275310
if remove_prebid && rewritten.contains("prebid") && rewritten.contains(".js") {
276-
// Strategy: Use targeted regex patterns to handle different cases
311+
let mut handled = false;
277312

278-
// Pattern 1: Remove complete link elements to prebid
279-
// Matches: [\"$\",\"link\",{\"href\":\"/js/prebid.min.js\"}]
280-
if let Ok(link_pattern) = Regex::new(
281-
r#",?\s*\[\\*"\$\\*",\\*"link\\*",\{[^\}]*?\\*"(?:href|src)\\*":\s*\\*"[^"]*?prebid[^"]*?\.js[^"]*?\\*"[^\}]*?\}\]"#,
282-
) {
283-
let new_value = link_pattern.replace_all(&rewritten, "");
284-
if new_value != rewritten {
313+
// Try serde-based approach for streaming payloads: self.__next_f.push([id, "JSON_STRING"])
314+
if rewritten.contains("__next_f") || rewritten.contains("__next_s") {
315+
if let Some(cleaned) = clean_nextjs_streaming_payload(&rewritten, true) {
285316
changed = true;
286-
rewritten = new_value.into_owned();
317+
rewritten = cleaned;
318+
handled = true;
287319
}
288320
}
289321

290-
// Pattern 2: Remove src/href attributes from script elements that have init code
291-
// This preserves the script element but removes just the problematic attribute
292-
if let Ok(src_attr_pattern) =
293-
Regex::new(r#",?\\*"(?:src|href)\\*":\s*\\*"[^"]*?prebid[^"]*?\.js[^"]*?\\*""#)
294-
{
295-
let new_value = src_attr_pattern.replace_all(&rewritten, "");
296-
if new_value != rewritten {
297-
changed = true;
298-
rewritten = new_value.into_owned();
299-
}
300-
}
301-
302-
// Pattern 3: Remove __next_s style elements without init code
303-
// [0,{"children":"...s.src='/js/prebid.min.js'..."}]
304-
if let Ok(next_s_pattern) =
305-
Regex::new(r#",?\s*\[\d+\s*,\s*\{[^\]]*?prebid[^\]]*?\.js[^\]]*?\}\]"#)
306-
{
307-
let new_value = next_s_pattern.replace_all(&rewritten, |caps: &regex::Captures<'_>| {
308-
let matched = &caps[0];
309-
// Only remove if it doesn't contain init code
310-
if matched.contains("pbjs=pbjs||{}") || matched.contains("pbjs.que") {
311-
matched.to_string()
312-
} else {
313-
String::new()
322+
// Fallback: Handle [0, {...}] style payloads (autoblog.com pattern)
323+
// Also runs if serde approach failed
324+
if !handled {
325+
if let Ok(next_s_pattern) =
326+
Regex::new(r#"\[\d+\s*,\s*\{[^\]]*?prebid[^\]]*?\.js[^\]]*?\}\],?"#)
327+
{
328+
let new_value =
329+
next_s_pattern.replace_all(&rewritten, |caps: &regex::Captures<'_>| {
330+
let matched = &caps[0];
331+
// Only remove if it doesn't contain init code
332+
if matched.contains("pbjs=pbjs||{}") || matched.contains("pbjs.que") {
333+
matched.to_string()
334+
} else {
335+
String::new()
336+
}
337+
});
338+
if new_value != rewritten {
339+
changed = true;
340+
rewritten = new_value.into_owned();
314341
}
315-
});
316-
if new_value != rewritten {
317-
changed = true;
318-
rewritten = new_value.into_owned();
319342
}
320343
}
321-
322-
// Clean up any resulting malformed JSON
323-
rewritten = rewritten
324-
.replace(",]", "]")
325-
.replace("[,", "[")
326-
.replace(",,", ",")
327-
.replace("{,", "{")
328-
.replace(",}", "}");
329-
330-
// Clean up leading commas in JSON strings (inside quotes)
331-
// Pattern: ",[ --> "[
332-
if let Ok(leading_comma_in_string) = Regex::new(r#"",\["#) {
333-
rewritten = leading_comma_in_string
334-
.replace_all(&rewritten, r#""["#)
335-
.into_owned();
336-
}
337344
}
338345

339346
changed.then_some(rewritten)

0 commit comments

Comments
 (0)