@@ -10,6 +10,10 @@ static TCHUNK_PATTERN: Lazy<Regex> =
1010/// Marker used to track script boundaries when combining RSC content.
1111pub ( crate ) const RSC_MARKER : & str = "\x00 SPLIT\x00 " ;
1212
13+ /// Maximum combined payload size for cross-script processing (10 MB).
14+ /// Payloads exceeding this limit are processed individually without cross-script T-chunk handling.
15+ const MAX_COMBINED_PAYLOAD_SIZE : usize = 10 * 1024 * 1024 ;
16+
1317// =============================================================================
1418// Escape Sequence Parsing
1519// =============================================================================
@@ -299,6 +303,7 @@ impl RscUrlRewriter {
299303 return Cow :: Borrowed ( input) ;
300304 }
301305
306+ // Phase 1: Regex-based URL pattern rewriting (handles escaped slashes, schemes, etc.)
302307 let replaced = self
303308 . pattern
304309 . replace_all ( input, |caps : & regex:: Captures < ' _ > | {
@@ -310,18 +315,20 @@ impl RscUrlRewriter {
310315 }
311316 } ) ;
312317
313- let still_contains_origin = match & replaced {
314- Cow :: Borrowed ( s) => s. contains ( & self . origin_host ) ,
315- Cow :: Owned ( s) => s. contains ( & self . origin_host ) ,
318+ // Phase 2: Handle bare host occurrences not matched by the URL regex
319+ // (e.g., `siteProductionDomain`). Only check if regex made no changes,
320+ // because if it did, we already know origin_host was present.
321+ let text = match & replaced {
322+ Cow :: Borrowed ( s) => * s,
323+ Cow :: Owned ( s) => s. as_str ( ) ,
316324 } ;
317325
318- if !still_contains_origin {
326+ if !text . contains ( & self . origin_host ) {
319327 return replaced;
320328 }
321329
322- // Also rewrite bare host occurrences inside RSC payloads (e.g. `siteProductionDomain`).
323- let owned = replaced. into_owned ( ) ;
324- Cow :: Owned ( owned. replace ( & self . origin_host , & self . request_host ) )
330+ // Bare host replacement needed
331+ Cow :: Owned ( text. replace ( & self . origin_host , & self . request_host ) )
325332 }
326333
327334 pub ( crate ) fn rewrite_to_string ( & self , input : & str ) -> String {
@@ -398,7 +405,26 @@ pub fn rewrite_rsc_scripts_combined(
398405 return vec ! [ rewrite_rsc_tchunks_with_rewriter( payloads[ 0 ] , & rewriter) ] ;
399406 }
400407
401- let mut combined = payloads[ 0 ] . to_string ( ) ;
408+ // Check total size before allocating combined buffer
409+ let total_size: usize =
410+ payloads. iter ( ) . map ( |p| p. len ( ) ) . sum :: < usize > ( ) + ( payloads. len ( ) - 1 ) * RSC_MARKER . len ( ) ;
411+
412+ if total_size > MAX_COMBINED_PAYLOAD_SIZE {
413+ // Fall back to individual processing if combined size is too large.
414+ // This sacrifices cross-script T-chunk correctness for memory safety.
415+ log:: warn!(
416+ "RSC combined payload size {} exceeds limit {}, processing individually" ,
417+ total_size,
418+ MAX_COMBINED_PAYLOAD_SIZE
419+ ) ;
420+ return payloads
421+ . iter ( )
422+ . map ( |p| rewrite_rsc_tchunks_with_rewriter ( p, & rewriter) )
423+ . collect ( ) ;
424+ }
425+
426+ let mut combined = String :: with_capacity ( total_size) ;
427+ combined. push_str ( payloads[ 0 ] ) ;
402428 for payload in & payloads[ 1 ..] {
403429 combined. push_str ( RSC_MARKER ) ;
404430 combined. push_str ( payload) ;
@@ -591,4 +617,80 @@ mod tests {
591617 "Bare host should be rewritten inside RSC payload. Got: {rewritten}"
592618 ) ;
593619 }
620+
621+ #[ test]
622+ fn single_payload_bypasses_combining ( ) {
623+ // When there's only one payload, we should process it directly without combining
624+ // Content: {"url":"https://origin.example.com/x"} = 37 bytes = 0x25 hex
625+ let payload = r#"1a:T25,{"url":"https://origin.example.com/x"}"# ;
626+ let payloads: Vec < & str > = vec ! [ payload] ;
627+
628+ let results = rewrite_rsc_scripts_combined (
629+ & payloads,
630+ "origin.example.com" ,
631+ "test.example.com" ,
632+ "https" ,
633+ ) ;
634+
635+ assert_eq ! ( results. len( ) , 1 ) ;
636+ assert ! (
637+ results[ 0 ] . contains( "test.example.com" ) ,
638+ "Single payload should be rewritten. Got: {}" ,
639+ results[ 0 ]
640+ ) ;
641+ // The length should be updated for the rewritten URL
642+ // {"url":"https://test.example.com/x"} = 35 bytes = 0x23 hex
643+ assert ! (
644+ results[ 0 ] . contains( ":T23," ) ,
645+ "T-chunk length should be updated. Got: {}" ,
646+ results[ 0 ]
647+ ) ;
648+ }
649+
650+ #[ test]
651+ fn empty_payloads_returns_empty ( ) {
652+ let payloads: Vec < & str > = vec ! [ ] ;
653+ let results = rewrite_rsc_scripts_combined (
654+ & payloads,
655+ "origin.example.com" ,
656+ "test.example.com" ,
657+ "https" ,
658+ ) ;
659+ assert ! ( results. is_empty( ) ) ;
660+ }
661+
662+ #[ test]
663+ fn no_origin_in_payloads_returns_unchanged ( ) {
664+ let payloads: Vec < & str > = vec ! [ r#"1a:T10,{"key":"value"}"# , r#"1b:T10,{"foo":"bar"}"# ] ;
665+
666+ let results = rewrite_rsc_scripts_combined (
667+ & payloads,
668+ "origin.example.com" ,
669+ "test.example.com" ,
670+ "https" ,
671+ ) ;
672+
673+ assert_eq ! ( results. len( ) , 2 ) ;
674+ // Content should be identical - note that T-chunk lengths may be recalculated
675+ // even if content is unchanged (due to how the algorithm works)
676+ assert ! (
677+ !results[ 0 ] . contains( "origin.example.com" ) && !results[ 0 ] . contains( "test.example.com" ) ,
678+ "No host should be present in payload without URLs"
679+ ) ;
680+ assert ! (
681+ !results[ 1 ] . contains( "origin.example.com" ) && !results[ 1 ] . contains( "test.example.com" ) ,
682+ "No host should be present in payload without URLs"
683+ ) ;
684+ // The content after T-chunk header should be preserved
685+ assert ! (
686+ results[ 0 ] . contains( r#"{"key":"value"}"# ) ,
687+ "Content should be preserved. Got: {}" ,
688+ results[ 0 ]
689+ ) ;
690+ assert ! (
691+ results[ 1 ] . contains( r#"{"foo":"bar"}"# ) ,
692+ "Content should be preserved. Got: {}" ,
693+ results[ 1 ]
694+ ) ;
695+ }
594696}
0 commit comments