@@ -140,94 +140,129 @@ impl IntegrationScriptRewriter for NextJsScriptRewriter {
140140 }
141141}
142142
143- /// Recursively removes prebid script/link elements from Next.js JSON payloads
144- /// Handles both array elements and objects, preserving init code
145- fn clean_prebid_from_json ( value : & mut JsonValue , remove_prebid : bool ) -> bool {
143+ /// Extract Next.js streaming payload components
144+ /// Returns: (prefix, chunk_id, json_str_escaped)
145+ fn extract_nextjs_streaming_payload ( content : & str ) -> Option < ( String , String , String ) > {
146+ let pattern = Regex :: new ( r#"(self\.__next_[fs]\.push\(\[)(\d+),"(.+)"\]\)"# ) . ok ( ) ?;
147+ let caps = pattern. captures ( content) ?;
148+
149+ Some ( (
150+ caps. get ( 1 ) ?. as_str ( ) . to_string ( ) ,
151+ caps. get ( 2 ) ?. as_str ( ) . to_string ( ) ,
152+ caps. get ( 3 ) ?. as_str ( ) . to_string ( ) ,
153+ ) )
154+ }
155+
156+ /// Check if a JSON value contains prebid file references
157+ fn has_prebid_reference ( value : & JsonValue ) -> bool {
158+ match value {
159+ JsonValue :: String ( s) => s. contains ( "prebid" ) && s. contains ( ".js" ) ,
160+ JsonValue :: Object ( obj) => obj. values ( ) . any ( |v| has_prebid_reference ( v) ) ,
161+ JsonValue :: Array ( arr) => arr. iter ( ) . any ( |v| has_prebid_reference ( v) ) ,
162+ _ => false ,
163+ }
164+ }
165+
166+ /// Check if props contain pbjs initialization code
167+ fn has_init_code ( props : & JsonValue ) -> bool {
168+ if let JsonValue :: Object ( obj) = props {
169+ if let Some ( JsonValue :: String ( children) ) = obj. get ( "children" ) {
170+ return children. contains ( "pbjs=pbjs||{}" ) || children. contains ( "pbjs.que" ) ;
171+ }
172+ }
173+ false
174+ }
175+
176+ /// Process a React element, returning true if it should be kept
177+ /// Modifies the element in place to remove prebid attributes
178+ fn process_react_element ( elem : & mut JsonValue , remove_prebid : bool ) -> bool {
146179 if !remove_prebid {
147- return false ;
180+ return true ;
148181 }
149182
150- let mut changed = false ;
183+ // Check if this is a React element: ["$", "element_type", {props}]
184+ if let JsonValue :: Array ( arr) = elem {
185+ if arr. len ( ) >= 3 {
186+ // Check for ["$", ...] pattern
187+ if let Some ( JsonValue :: String ( dollar) ) = arr. get ( 0 ) {
188+ if dollar != "$" {
189+ return true ;
190+ }
191+ }
151192
152- match value {
153- JsonValue :: Array ( arr) => {
154- // Check if this is a React element array like ["$", "script", {...}]
155- if arr. len ( ) >= 3 {
156- if let ( Some ( JsonValue :: String ( dollar) ) , Some ( JsonValue :: String ( elem_type) ) ) =
157- ( arr. get ( 0 ) , arr. get ( 1 ) )
158- {
159- if dollar == "$" && ( elem_type == "script" || elem_type == "link" ) {
160- // Check if the props object (index 2) contains prebid
161- if let Some ( JsonValue :: Object ( props) ) = arr. get ( 2 ) {
162- let has_prebid = props. values ( ) . any ( |v| {
163- v. as_str ( )
164- . map_or ( false , |s| s. contains ( "prebid" ) && s. contains ( ".js" ) )
165- } ) ;
166-
167- if has_prebid {
168- // For links, remove entirely by returning marker
169- if elem_type == "link" {
170- * value = JsonValue :: Null ;
171- return true ;
172- }
173-
174- // For scripts, check if it has init code
175- let has_init = props
176- . get ( "children" )
177- . and_then ( |v| v. as_str ( ) )
178- . map_or ( false , |s| {
179- s. contains ( "pbjs=pbjs||{}" ) || s. contains ( "pbjs.que" )
180- } ) ;
181-
182- if has_init {
183- // Keep script but remove src/href attributes
184- if let Some ( JsonValue :: Object ( props_mut) ) = arr. get_mut ( 2 ) {
185- changed = props_mut. remove ( "src" ) . is_some ( ) || changed;
186- changed = props_mut. remove ( "href" ) . is_some ( ) || changed;
187- }
188- } else {
189- // No init code, remove entirely
190- * value = JsonValue :: Null ;
191- return true ;
192- }
193+ // Get element type (clone to avoid borrow issues)
194+ let elem_type = arr. get ( 1 ) . and_then ( |v| v. as_str ( ) ) . map ( |s| s. to_string ( ) ) ;
195+
196+ if let Some ( elem_type) = elem_type {
197+ // Now we can get mutable reference to props
198+ if let Some ( props) = arr. get_mut ( 2 ) {
199+ if !has_prebid_reference ( props) {
200+ return true ; // No prebid, keep as-is
201+ }
202+
203+ // Has prebid reference
204+ if elem_type == "link" {
205+ return false ; // Remove link elements entirely
206+ }
207+
208+ if elem_type == "script" {
209+ if has_init_code ( props) {
210+ // Keep script but remove src/href
211+ if let JsonValue :: Object ( obj) = props {
212+ obj. remove ( "src" ) ;
213+ obj. remove ( "href" ) ;
193214 }
215+ return true ;
216+ } else {
217+ return false ; // Remove script without init
194218 }
195219 }
196220 }
197221 }
222+ }
223+ }
198224
199- // Recursively process array elements
200- for item in arr. iter_mut ( ) {
201- changed = clean_prebid_from_json ( item, remove_prebid) || changed;
202- }
225+ true // Keep by default
226+ }
203227
204- // Remove null markers
205- arr. retain ( |v| !v. is_null ( ) ) ;
206- }
207- JsonValue :: Object ( obj) => {
208- // Check if this is a Next.js streaming object like {children: "...", id: "..."}
209- if let Some ( JsonValue :: String ( children) ) = obj. get ( "children" ) {
210- if children. contains ( "prebid" ) && children. contains ( ".js" ) {
211- let has_init =
212- children. contains ( "pbjs=pbjs||{}" ) || children. contains ( "pbjs.que" ) ;
213-
214- if !has_init {
215- // Remove entire object by marking as null
216- * value = JsonValue :: Null ;
217- return true ;
218- }
219- }
220- }
228+ /// Clean Next.js streaming payloads using proper JSON parsing
229+ /// Handles: self.__next_f.push([id, "JSON_STRING"])
230+ fn clean_nextjs_streaming_payload ( content : & str , remove_prebid : bool ) -> Option < String > {
231+ // Extract payload components
232+ let ( prefix, chunk_id, json_escaped) = extract_nextjs_streaming_payload ( content) ?;
221233
222- // Recursively process object values
223- for val in obj. values_mut ( ) {
224- changed = clean_prebid_from_json ( val, remove_prebid) || changed;
225- }
226- }
227- _ => { }
234+ // Unescape the JSON string
235+ let json_str = json_escaped. replace ( r#"\""# , "\" " ) ;
236+
237+ // The JSON string contains comma-separated React elements, not a single valid JSON
238+ // Wrap it in array brackets to parse
239+ let json_with_brackets = format ! ( "[{}]" , json_str) ;
240+
241+ // Parse into Vec<JsonValue>
242+ let mut elements: Vec < JsonValue > = serde_json:: from_str ( & json_with_brackets) . ok ( ) ?;
243+
244+ // Filter and transform elements
245+ elements. retain_mut ( |elem| process_react_element ( elem, remove_prebid) ) ;
246+
247+ // If all elements were removed, we still need valid JSON
248+ if elements. is_empty ( ) {
249+ return Some ( format ! ( r#"{}{},"[]"]);"# , prefix, chunk_id) ) ;
228250 }
229251
230- changed
252+ // Serialize back to JSON
253+ let json_array = serde_json:: to_string ( & elements) . ok ( ) ?;
254+
255+ // Remove outer array brackets
256+ let json_str_rebuilt = & json_array[ 1 ..json_array. len ( ) - 1 ] ;
257+
258+ // Re-escape quotes
259+ let json_escaped_rebuilt = json_str_rebuilt. replace ( '"' , r#"\""# ) ;
260+
261+ // Reconstruct the full JavaScript
262+ Some ( format ! (
263+ r#"{}{},"{}"]);"# ,
264+ prefix, chunk_id, json_escaped_rebuilt
265+ ) )
231266}
232267
233268fn rewrite_nextjs_values (
@@ -273,67 +308,39 @@ fn rewrite_nextjs_values(
273308
274309 // Remove prebid-related elements from Next.js payloads
275310 if remove_prebid && rewritten. contains ( "prebid" ) && rewritten. contains ( ".js" ) {
276- // Strategy: Use targeted regex patterns to handle different cases
311+ let mut handled = false ;
277312
278- // Pattern 1: Remove complete link elements to prebid
279- // Matches: [\"$\",\"link\",{\"href\":\"/js/prebid.min.js\"}]
280- if let Ok ( link_pattern) = Regex :: new (
281- r#",?\s*\[\\*"\$\\*",\\*"link\\*",\{[^\}]*?\\*"(?:href|src)\\*":\s*\\*"[^"]*?prebid[^"]*?\.js[^"]*?\\*"[^\}]*?\}\]"# ,
282- ) {
283- let new_value = link_pattern. replace_all ( & rewritten, "" ) ;
284- if new_value != rewritten {
313+ // Try serde-based approach for streaming payloads: self.__next_f.push([id, "JSON_STRING"])
314+ if rewritten. contains ( "__next_f" ) || rewritten. contains ( "__next_s" ) {
315+ if let Some ( cleaned) = clean_nextjs_streaming_payload ( & rewritten, true ) {
285316 changed = true ;
286- rewritten = new_value. into_owned ( ) ;
317+ rewritten = cleaned;
318+ handled = true ;
287319 }
288320 }
289321
290- // Pattern 2: Remove src/href attributes from script elements that have init code
291- // This preserves the script element but removes just the problematic attribute
292- if let Ok ( src_attr_pattern) =
293- Regex :: new ( r#",?\\*"(?:src|href)\\*":\s*\\*"[^"]*?prebid[^"]*?\.js[^"]*?\\*""# )
294- {
295- let new_value = src_attr_pattern. replace_all ( & rewritten, "" ) ;
296- if new_value != rewritten {
297- changed = true ;
298- rewritten = new_value. into_owned ( ) ;
299- }
300- }
301-
302- // Pattern 3: Remove __next_s style elements without init code
303- // [0,{"children":"...s.src='/js/prebid.min.js'..."}]
304- if let Ok ( next_s_pattern) =
305- Regex :: new ( r#",?\s*\[\d+\s*,\s*\{[^\]]*?prebid[^\]]*?\.js[^\]]*?\}\]"# )
306- {
307- let new_value = next_s_pattern. replace_all ( & rewritten, |caps : & regex:: Captures < ' _ > | {
308- let matched = & caps[ 0 ] ;
309- // Only remove if it doesn't contain init code
310- if matched. contains ( "pbjs=pbjs||{}" ) || matched. contains ( "pbjs.que" ) {
311- matched. to_string ( )
312- } else {
313- String :: new ( )
322+ // Fallback: Handle [0, {...}] style payloads (autoblog.com pattern)
323+ // Also runs if serde approach failed
324+ if !handled {
325+ if let Ok ( next_s_pattern) =
326+ Regex :: new ( r#"\[\d+\s*,\s*\{[^\]]*?prebid[^\]]*?\.js[^\]]*?\}\],?"# )
327+ {
328+ let new_value =
329+ next_s_pattern. replace_all ( & rewritten, |caps : & regex:: Captures < ' _ > | {
330+ let matched = & caps[ 0 ] ;
331+ // Only remove if it doesn't contain init code
332+ if matched. contains ( "pbjs=pbjs||{}" ) || matched. contains ( "pbjs.que" ) {
333+ matched. to_string ( )
334+ } else {
335+ String :: new ( )
336+ }
337+ } ) ;
338+ if new_value != rewritten {
339+ changed = true ;
340+ rewritten = new_value. into_owned ( ) ;
314341 }
315- } ) ;
316- if new_value != rewritten {
317- changed = true ;
318- rewritten = new_value. into_owned ( ) ;
319342 }
320343 }
321-
322- // Clean up any resulting malformed JSON
323- rewritten = rewritten
324- . replace ( ",]" , "]" )
325- . replace ( "[," , "[" )
326- . replace ( ",," , "," )
327- . replace ( "{," , "{" )
328- . replace ( ",}" , "}" ) ;
329-
330- // Clean up leading commas in JSON strings (inside quotes)
331- // Pattern: ",[ --> "[
332- if let Ok ( leading_comma_in_string) = Regex :: new ( r#"",\["# ) {
333- rewritten = leading_comma_in_string
334- . replace_all ( & rewritten, r#""["# )
335- . into_owned ( ) ;
336- }
337344 }
338345
339346 changed. then_some ( rewritten)
0 commit comments