Skip to content

Commit fd5b91d

Browse files
committed
Fixed bug
1 parent dd02e05 commit fd5b91d

File tree

3 files changed

+169
-111
lines changed

3 files changed

+169
-111
lines changed

crates/common/src/integrations/nextjs/html_post_process.rs

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,38 +30,62 @@ impl IntegrationHtmlPostProcessor for NextJsHtmlPostProcessor {
3030
}
3131

3232
fn should_process(&self, html: &str, ctx: &IntegrationHtmlContext<'_>) -> bool {
33-
let _ = html;
3433
if !self.config.enabled || self.config.rewrite_attributes.is_empty() {
3534
return false;
3635
}
3736

38-
let Some(state) = ctx
37+
// Check if we have captured placeholders from streaming
38+
if let Some(state) = ctx
3939
.document_state
4040
.get::<Mutex<NextJsRscPostProcessState>>(NEXTJS_INTEGRATION_ID)
41-
else {
42-
return false;
43-
};
41+
{
42+
let guard = state.lock().unwrap_or_else(|e| e.into_inner());
43+
if !guard.payloads.is_empty() {
44+
return true;
45+
}
46+
}
4447

45-
let guard = state.lock().unwrap_or_else(|e| e.into_inner());
46-
!guard.payloads.is_empty()
48+
// Also check if HTML contains RSC scripts that weren't captured during streaming
49+
// (e.g., fragmented scripts that we skipped during the streaming pass)
50+
html.contains("__next_f.push") && html.contains(ctx.origin_host)
4751
}
4852

4953
fn post_process(&self, html: &mut String, ctx: &IntegrationHtmlContext<'_>) -> bool {
50-
let Some(state) = ctx
54+
// Try to get payloads captured during streaming (placeholder approach)
55+
let payloads = ctx
5156
.document_state
5257
.get::<Mutex<NextJsRscPostProcessState>>(NEXTJS_INTEGRATION_ID)
53-
else {
54-
return false;
55-
};
56-
57-
let payloads = {
58-
let mut guard = state.lock().unwrap_or_else(|e| e.into_inner());
59-
guard.take_payloads()
60-
};
61-
if payloads.is_empty() {
62-
return false;
58+
.map(|state| {
59+
let mut guard = state.lock().unwrap_or_else(|e| e.into_inner());
60+
guard.take_payloads()
61+
})
62+
.unwrap_or_default();
63+
64+
if !payloads.is_empty() {
65+
// Placeholder approach: substitute placeholders with rewritten payloads
66+
return self.substitute_placeholders(html, ctx, payloads);
6367
}
6468

69+
// Fallback: re-parse HTML to find RSC scripts that weren't captured during streaming
70+
// (e.g., fragmented scripts that we skipped during the streaming pass)
71+
post_process_rsc_html_in_place_with_limit(
72+
html,
73+
ctx.origin_host,
74+
ctx.request_host,
75+
ctx.request_scheme,
76+
self.config.max_combined_payload_bytes,
77+
)
78+
}
79+
}
80+
81+
impl NextJsHtmlPostProcessor {
82+
/// Substitute placeholders with rewritten payloads (fast path for unfragmented scripts).
83+
fn substitute_placeholders(
84+
&self,
85+
html: &mut String,
86+
ctx: &IntegrationHtmlContext<'_>,
87+
payloads: Vec<String>,
88+
) -> bool {
6589
let payload_refs: Vec<&str> = payloads.iter().map(String::as_str).collect();
6690
let mut rewritten_payloads = rewrite_rsc_scripts_combined_with_limit(
6791
payload_refs.as_slice(),

crates/common/src/integrations/nextjs/mod.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,4 +419,73 @@ mod tests {
419419
final_html
420420
);
421421
}
422+
423+
#[test]
424+
fn html_processor_preserves_non_rsc_scripts_with_chunked_streaming() {
425+
// Regression test: ensure non-RSC scripts are preserved when streamed alongside RSC scripts.
426+
// With small chunk sizes, scripts get fragmented and the buffering logic must correctly
427+
// handle non-RSC scripts without corrupting them.
428+
let html = r#"<html><body>
429+
<script>console.log("hello world");</script>
430+
<script>self.__next_f.push([1,'{"url":"https://origin.example.com/page"}'])</script>
431+
<script>window.analytics = { track: function(e) { console.log(e); } };</script>
432+
</body></html>"#;
433+
434+
let mut settings = create_test_settings();
435+
settings
436+
.integrations
437+
.insert_config(
438+
"nextjs",
439+
&json!({
440+
"enabled": true,
441+
"rewrite_attributes": ["url"],
442+
}),
443+
)
444+
.expect("should update nextjs config");
445+
let registry = IntegrationRegistry::new(&settings);
446+
let config = config_from_settings(&settings, &registry);
447+
let processor = create_html_processor(config);
448+
// Use small chunk size to force fragmentation
449+
let pipeline_config = PipelineConfig {
450+
input_compression: Compression::None,
451+
output_compression: Compression::None,
452+
chunk_size: 16,
453+
};
454+
let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
455+
456+
let mut output = Vec::new();
457+
pipeline
458+
.process(Cursor::new(html.as_bytes()), &mut output)
459+
.unwrap();
460+
let final_html = String::from_utf8_lossy(&output);
461+
462+
// Non-RSC scripts should be preserved
463+
assert!(
464+
final_html.contains(r#"console.log("hello world");"#),
465+
"First non-RSC script should be preserved intact. Output: {}",
466+
final_html
467+
);
468+
assert!(
469+
final_html.contains("window.analytics"),
470+
"Third non-RSC script should be preserved. Output: {}",
471+
final_html
472+
);
473+
assert!(
474+
final_html.contains("track: function(e)"),
475+
"Third non-RSC script content should be intact. Output: {}",
476+
final_html
477+
);
478+
479+
// RSC scripts should be rewritten
480+
assert!(
481+
final_html.contains("test.example.com"),
482+
"RSC URL should be rewritten. Output: {}",
483+
final_html
484+
);
485+
assert!(
486+
!final_html.contains(RSC_PAYLOAD_PLACEHOLDER_PREFIX),
487+
"No placeholders should remain. Output: {}",
488+
final_html
489+
);
490+
}
422491
}
Lines changed: 58 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use std::borrow::Cow;
21
use std::sync::{Arc, Mutex};
32

43
use crate::integrations::{
@@ -11,36 +10,18 @@ use super::{NextJsIntegrationConfig, NEXTJS_INTEGRATION_ID};
1110
pub(super) const RSC_PAYLOAD_PLACEHOLDER_PREFIX: &str = "__ts_rsc_payload_";
1211
pub(super) const RSC_PAYLOAD_PLACEHOLDER_SUFFIX: &str = "__";
1312

13+
/// State for RSC placeholder-based rewriting.
14+
///
15+
/// Stores RSC payloads extracted during streaming for later rewriting during post-processing.
16+
/// Only unfragmented RSC scripts are processed during streaming; fragmented scripts are
17+
/// handled by the post-processor which re-parses the final HTML.
1418
#[derive(Default)]
1519
pub(super) struct NextJsRscPostProcessState {
1620
pub(super) payloads: Vec<String>,
17-
buffer: String,
18-
buffering: bool,
1921
}
2022

2123
impl NextJsRscPostProcessState {
22-
fn buffer_chunk(&mut self, chunk: &str) {
23-
if !self.buffering {
24-
self.buffering = true;
25-
self.buffer.clear();
26-
}
27-
self.buffer.push_str(chunk);
28-
}
29-
30-
/// Returns the complete script content, either borrowed from input or owned from buffer.
31-
fn take_script_or_borrow<'a>(&mut self, chunk: &'a str) -> Cow<'a, str> {
32-
if self.buffering {
33-
self.buffer.push_str(chunk);
34-
self.buffering = false;
35-
Cow::Owned(std::mem::take(&mut self.buffer))
36-
} else {
37-
Cow::Borrowed(chunk)
38-
}
39-
}
40-
4124
pub(super) fn take_payloads(&mut self) -> Vec<String> {
42-
self.buffer.clear();
43-
self.buffering = false;
4425
std::mem::take(&mut self.payloads)
4526
}
4627
}
@@ -73,84 +54,48 @@ impl IntegrationScriptRewriter for NextJsRscPlaceholderRewriter {
7354
return ScriptRewriteAction::keep();
7455
}
7556

57+
// Only process complete (unfragmented) scripts during streaming.
58+
// Fragmented scripts are handled by the post-processor which re-parses the final HTML.
59+
// This avoids corrupting non-RSC scripts that happen to be fragmented during streaming.
7660
if !ctx.is_last_in_text_node {
77-
if let Some(existing) = ctx
78-
.document_state
79-
.get::<Mutex<NextJsRscPostProcessState>>(NEXTJS_INTEGRATION_ID)
80-
{
81-
let mut guard = existing.lock().unwrap_or_else(|e| e.into_inner());
82-
if guard.buffering {
83-
guard.buffer_chunk(content);
84-
return ScriptRewriteAction::remove_node();
85-
}
86-
}
87-
88-
let trimmed = content.trim_start();
89-
if trimmed.starts_with('{') || trimmed.starts_with('[') {
90-
// Avoid interfering with other inline JSON scripts (e.g. `__NEXT_DATA__`, JSON-LD).
91-
return ScriptRewriteAction::keep();
92-
}
93-
94-
let state = ctx
95-
.document_state
96-
.get_or_insert_with(NEXTJS_INTEGRATION_ID, || {
97-
Mutex::new(NextJsRscPostProcessState::default())
98-
});
99-
let mut guard = state.lock().unwrap_or_else(|e| e.into_inner());
100-
guard.buffer_chunk(content);
101-
return ScriptRewriteAction::remove_node();
102-
}
103-
104-
if !content.contains("__next_f")
105-
&& ctx
106-
.document_state
107-
.get::<Mutex<NextJsRscPostProcessState>>(NEXTJS_INTEGRATION_ID)
108-
.is_none()
109-
{
61+
// Script is fragmented - skip placeholder processing.
62+
// The post-processor will handle RSC scripts at end-of-document.
11063
return ScriptRewriteAction::keep();
11164
}
11265

113-
let state = ctx
114-
.document_state
115-
.get_or_insert_with(NEXTJS_INTEGRATION_ID, || {
116-
Mutex::new(NextJsRscPostProcessState::default())
117-
});
118-
let mut guard = state.lock().unwrap_or_else(|e| e.into_inner());
119-
let script = guard.take_script_or_borrow(content);
120-
let was_buffered = matches!(script, Cow::Owned(_));
121-
122-
if !script.contains("__next_f") {
123-
if was_buffered {
124-
return ScriptRewriteAction::replace(script.into_owned());
125-
}
66+
// Quick check: skip scripts that can't be RSC payloads
67+
if !content.contains("__next_f") {
12668
return ScriptRewriteAction::keep();
12769
}
12870

129-
let Some((payload_start, payload_end)) = find_rsc_push_payload_range(&script) else {
130-
if was_buffered {
131-
return ScriptRewriteAction::replace(script.into_owned());
132-
}
71+
let Some((payload_start, payload_end)) = find_rsc_push_payload_range(content) else {
72+
// Contains __next_f but doesn't match RSC push pattern - leave unchanged
13373
return ScriptRewriteAction::keep();
13474
};
13575

13676
if payload_start > payload_end
137-
|| payload_end > script.len()
138-
|| !script.is_char_boundary(payload_start)
139-
|| !script.is_char_boundary(payload_end)
77+
|| payload_end > content.len()
78+
|| !content.is_char_boundary(payload_start)
79+
|| !content.is_char_boundary(payload_end)
14080
{
141-
if was_buffered {
142-
return ScriptRewriteAction::replace(script.into_owned());
143-
}
14481
return ScriptRewriteAction::keep();
14582
}
14683

84+
// Insert placeholder for this RSC payload and store original for post-processing
85+
let state = ctx
86+
.document_state
87+
.get_or_insert_with(NEXTJS_INTEGRATION_ID, || {
88+
Mutex::new(NextJsRscPostProcessState::default())
89+
});
90+
let mut guard = state.lock().unwrap_or_else(|e| e.into_inner());
91+
14792
let placeholder_index = guard.payloads.len();
14893
let placeholder = rsc_payload_placeholder(placeholder_index);
14994
guard
15095
.payloads
151-
.push(script[payload_start..payload_end].to_string());
96+
.push(content[payload_start..payload_end].to_string());
15297

153-
let mut rewritten = script.into_owned();
98+
let mut rewritten = content.to_string();
15499
rewritten.replace_range(payload_start..payload_end, &placeholder);
155100
ScriptRewriteAction::replace(rewritten)
156101
}
@@ -211,32 +156,52 @@ mod tests {
211156
}
212157

213158
#[test]
214-
fn buffers_fragmented_scripts_and_emits_single_replacement() {
159+
fn skips_fragmented_scripts_for_post_processor_handling() {
160+
// Fragmented scripts are not processed during streaming - they're passed through
161+
// unchanged and handled by the post-processor which re-parses the final HTML.
215162
let state = IntegrationDocumentState::default();
216163
let rewriter = NextJsRscPlaceholderRewriter::new(test_config());
217164

218165
let first = "self.__next_f.push([1,\"https://origin.example.com";
219166
let second = "/page\"])";
220167

168+
// Intermediate chunk should be kept (not processed)
221169
let action_first = rewriter.rewrite(first, &ctx(false, &state));
222170
assert_eq!(
223171
action_first,
224-
ScriptRewriteAction::RemoveNode,
225-
"Intermediate chunk should be removed"
172+
ScriptRewriteAction::Keep,
173+
"Intermediate chunk should be kept unchanged"
226174
);
227175

176+
// Final chunk should also be kept since it doesn't contain the full RSC pattern
228177
let action_second = rewriter.rewrite(second, &ctx(true, &state));
229-
let ScriptRewriteAction::Replace(rewritten) = action_second else {
230-
panic!("Final chunk should be replaced with combined output");
231-
};
178+
assert_eq!(
179+
action_second,
180+
ScriptRewriteAction::Keep,
181+
"Final chunk of fragmented script should be kept"
182+
);
232183

184+
// No payloads should be stored - post-processor will handle this
233185
assert!(
234-
rewritten.contains(RSC_PAYLOAD_PLACEHOLDER_PREFIX),
235-
"Combined output should include placeholder. Got: {rewritten}"
186+
state
187+
.get::<Mutex<NextJsRscPostProcessState>>(NEXTJS_INTEGRATION_ID)
188+
.is_none(),
189+
"No RSC state should be created for fragmented scripts"
236190
);
237-
assert!(
238-
rewritten.contains("self.__next_f.push"),
239-
"Combined output should keep the push call. Got: {rewritten}"
191+
}
192+
193+
#[test]
194+
fn skips_non_rsc_scripts() {
195+
let state = IntegrationDocumentState::default();
196+
let rewriter = NextJsRscPlaceholderRewriter::new(test_config());
197+
198+
let script = r#"console.log("hello world");"#;
199+
let action = rewriter.rewrite(script, &ctx(true, &state));
200+
201+
assert_eq!(
202+
action,
203+
ScriptRewriteAction::Keep,
204+
"Non-RSC scripts should be kept unchanged"
240205
);
241206
}
242207
}

0 commit comments

Comments
 (0)