@@ -5,6 +5,7 @@ use std::cell::Cell;
55use std:: rc:: Rc ;
66
77use lol_html:: { element, html_content:: ContentType , text, Settings as RewriterSettings } ;
8+ use regex:: Regex ;
89
910use crate :: settings:: Settings ;
1011use crate :: streaming_processor:: { HtmlRewriterAdapter , StreamProcessor } ;
@@ -17,7 +18,8 @@ pub struct HtmlProcessorConfig {
1718 pub request_host : String ,
1819 pub request_scheme : String ,
1920 pub enable_prebid : bool ,
20- pub nextjs_rewrite_urls : bool ,
21+ pub nextjs_enabled : bool ,
22+ pub nextjs_attributes : Vec < String > ,
2123}
2224
2325impl HtmlProcessorConfig {
@@ -33,7 +35,8 @@ impl HtmlProcessorConfig {
3335 request_host : request_host. to_string ( ) ,
3436 request_scheme : request_scheme. to_string ( ) ,
3537 enable_prebid : settings. prebid . auto_configure ,
36- nextjs_rewrite_urls : settings. publisher . nextjs . rewrite_urls ,
38+ nextjs_enabled : settings. publisher . nextjs . enabled ,
39+ nextjs_attributes : settings. publisher . nextjs . rewrite_attributes . clone ( ) ,
3740 }
3841 }
3942}
@@ -68,29 +71,32 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
6871 format ! ( "//{}" , self . request_host)
6972 }
7073
71- fn rewrite_nextjs_hrefs ( & self , content : & str ) -> Option < String > {
74+ fn rewrite_nextjs_values ( & self , content : & str , attributes : & [ String ] ) -> Option < String > {
7275 let mut rewritten = content. to_string ( ) ;
7376 let mut changed = false ;
74- for prefix in [ "\" href\" :\" " , "\\ \" href\\ \" :\\ \" " ] {
75- let https_pattern = format ! ( "{}https://{}" , prefix, self . origin_host) ;
76- let http_pattern = format ! ( "{}http://{}" , prefix, self . origin_host) ;
77- let proto_pattern = format ! ( "{}//{}" , prefix, self . origin_host) ;
78-
79- let href_replacement =
80- format ! ( "{}{}://{}" , prefix, self . request_scheme, self . request_host) ;
81- let proto_replacement = format ! ( "{}//{}" , prefix, self . request_host) ;
82-
83- let new_rewritten = rewritten
84- . replace ( & https_pattern, & href_replacement)
85- . replace ( & http_pattern, & href_replacement)
86- . replace ( & proto_pattern, & proto_replacement) ;
87-
88- if new_rewritten != rewritten {
77+ let escaped_origin = regex:: escape ( & self . origin_host ) ;
78+ for attribute in attributes {
79+ let escaped_attr = regex:: escape ( attribute) ;
80+ let pattern = format ! (
81+ r#"(?P<prefix>(?:\\*")?{attr}(?:\\*")?:\\*")(?P<scheme>https?://|//){origin}"# ,
82+ attr = escaped_attr,
83+ origin = escaped_origin
84+ ) ;
85+ let regex = Regex :: new ( & pattern) . expect ( "valid Next.js rewrite regex" ) ;
86+ let new_value = regex. replace_all ( & rewritten, |caps : & regex:: Captures | {
87+ let scheme = & caps[ "scheme" ] ;
88+ let replacement = if scheme == "//" {
89+ format ! ( "//{}" , self . request_host)
90+ } else {
91+ self . replacement_url ( )
92+ } ;
93+ format ! ( "{}{}" , & caps[ "prefix" ] , replacement)
94+ } ) ;
95+ if new_value != rewritten {
8996 changed = true ;
90- rewritten = new_rewritten ;
97+ rewritten = new_value . into_owned ( ) ;
9198 }
9299 }
93-
94100 if changed {
95101 Some ( rewritten)
96102 } else {
@@ -105,6 +111,8 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
105111 request_scheme : config. request_scheme . clone ( ) ,
106112 } ) ;
107113
114+ let nextjs_attributes = Rc :: new ( config. nextjs_attributes . clone ( ) ) ;
115+
108116 let injected_tsjs = Rc :: new ( Cell :: new ( false ) ) ;
109117
110118 fn is_prebid_script_url ( url : & str ) -> bool {
@@ -230,12 +238,13 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
230238 } ) ,
231239 ] ;
232240
233- if config. nextjs_rewrite_urls {
241+ if config. nextjs_enabled && !nextjs_attributes . is_empty ( ) {
234242 element_content_handlers. push ( text ! ( "script#__NEXT_DATA__" , {
235243 let patterns = patterns. clone( ) ;
244+ let attributes = nextjs_attributes. clone( ) ;
236245 move |text| {
237246 let content = text. as_str( ) ;
238- if let Some ( rewritten) = patterns. rewrite_nextjs_hrefs ( content) {
247+ if let Some ( rewritten) = patterns. rewrite_nextjs_values ( content, & attributes ) {
239248 text. replace( & rewritten, ContentType :: Text ) ;
240249 }
241250 Ok ( ( ) )
@@ -244,12 +253,13 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
244253
245254 element_content_handlers. push ( text ! ( "script" , {
246255 let patterns = patterns. clone( ) ;
256+ let attributes = nextjs_attributes. clone( ) ;
247257 move |text| {
248258 let content = text. as_str( ) ;
249259 if !content. contains( "self.__next_f" ) {
250260 return Ok ( ( ) ) ;
251261 }
252- if let Some ( rewritten) = patterns. rewrite_nextjs_hrefs ( content) {
262+ if let Some ( rewritten) = patterns. rewrite_nextjs_values ( content, & attributes ) {
253263 text. replace( & rewritten, ContentType :: Text ) ;
254264 }
255265 Ok ( ( ) )
@@ -299,7 +309,8 @@ mod tests {
299309 request_host : "test.example.com" . to_string ( ) ,
300310 request_scheme : "https" . to_string ( ) ,
301311 enable_prebid : false ,
302- nextjs_rewrite_urls : false ,
312+ nextjs_enabled : false ,
313+ nextjs_attributes : vec ! [ "href" . to_string( ) , "link" . to_string( ) , "url" . to_string( ) ] ,
303314 }
304315 }
305316
@@ -389,7 +400,8 @@ mod tests {
389400 </body></html>"# ;
390401
391402 let mut config = create_test_config ( ) ;
392- config. nextjs_rewrite_urls = true ;
403+ config. nextjs_enabled = true ;
404+ config. nextjs_attributes = vec ! [ "href" . to_string( ) , "link" . to_string( ) , "url" . to_string( ) ] ;
393405 let processor = create_html_processor ( config) ;
394406 let pipeline_config = PipelineConfig {
395407 input_compression : Compression :: None ,
@@ -403,6 +415,8 @@ mod tests {
403415 . process ( Cursor :: new ( html. as_bytes ( ) ) , & mut output)
404416 . unwrap ( ) ;
405417 let processed = String :: from_utf8_lossy ( & output) ;
418+ println ! ( "processed={processed}" ) ;
419+ println ! ( "processed stream payload: {}" , processed) ;
406420 println ! ( "processed stream payload: {}" , processed) ;
407421
408422 assert ! (
@@ -435,12 +449,13 @@ mod tests {
435449 fn test_rewrites_nextjs_stream_payload ( ) {
436450 let html = r#"<html><body>
437451 <script>
438- self.__next_f.push([1,"chunk", "prefix {\"inner\":\"value\"} \"href\":\"http://origin.example.com/dashboard\", \"href\ ":\"https://origin.example.com/api-test\" suffix", {"dataHost":"https://origin.example.com/api"}]);
452+ self.__next_f.push([1,"chunk", "prefix {\"inner\":\"value\"} \\\ "href\\\ ":\\\ "http://origin.example.com/dashboard\\\ ", \\\"link\\\ ":\\\ "https://origin.example.com/api-test\\\ " suffix", {"href":"http://origin.example.com/secondary", "dataHost":"https://origin.example.com/api"}]);
439453 </script>
440454 </body></html>"# ;
441455
442456 let mut config = create_test_config ( ) ;
443- config. nextjs_rewrite_urls = true ;
457+ config. nextjs_enabled = true ;
458+ config. nextjs_attributes = vec ! [ "href" . to_string( ) , "link" . to_string( ) , "url" . to_string( ) ] ;
444459 let processor = create_html_processor ( config) ;
445460 let pipeline_config = PipelineConfig {
446461 input_compression : Compression :: None ,
@@ -454,18 +469,19 @@ mod tests {
454469 . process ( Cursor :: new ( html. as_bytes ( ) ) , & mut output)
455470 . unwrap ( ) ;
456471 let processed = String :: from_utf8_lossy ( & output) ;
472+ let normalized = processed. replace ( '\\' , "" ) ;
457473 assert ! (
458- processed. contains( "https://test.example.com/dashboard" ) ,
459- "Should rewrite URLs inside streamed Next.js payloads"
474+ normalized. contains( "\" href\" :\" https://test.example.com/dashboard\" " ) ,
475+ "Should rewrite escaped href sequences inside streamed payloads. Content: {}" ,
476+ normalized
460477 ) ;
461478 assert ! (
462- processed. contains( "\\ \" href\\ \" :\\ \" https://test.example.com/api-test\\ \" " )
463- || processed. contains( "\" href\" :\" https://test.example.com/api-test\" " ) ,
464- "Should rewrite escaped href sequences inside streamed payloads"
479+ normalized. contains( "\" href\" :\" https://test.example.com/secondary\" " ) ,
480+ "Should rewrite plain href attributes inside streamed payloads"
465481 ) ;
466482 assert ! (
467- !processed . contains( "\" href \" :\" http ://origin .example.com/dashboard \" " ) ,
468- "Should remove origin host references from href fields "
483+ normalized . contains( "\" link \" :\" https ://test .example.com/api-test \" " ) ,
484+ "Should rewrite additional configured attributes like link "
469485 ) ;
470486 assert ! (
471487 processed. contains( "\" dataHost\" :\" https://origin.example.com/api\" " ) ,
@@ -554,9 +570,14 @@ mod tests {
554570 assert_eq ! ( config. request_scheme, "https" ) ;
555571 assert ! ( config. enable_prebid) ; // Uses default true
556572 assert ! (
557- !config. nextjs_rewrite_urls ,
573+ !config. nextjs_enabled ,
558574 "Next.js rewrites should default to disabled"
559575 ) ;
576+ assert_eq ! (
577+ config. nextjs_attributes,
578+ vec![ "href" . to_string( ) , "link" . to_string( ) , "url" . to_string( ) ] ,
579+ "Should default to rewriting href/link/url attributes"
580+ ) ;
560581 }
561582
562583 #[ test]
0 commit comments