Skip to content

Commit 90077ba

Browse files
authored
Refactor Next.js integration (#119)
1 parent e8e13f4 commit 90077ba

File tree

7 files changed

+543
-383
lines changed

7 files changed

+543
-383
lines changed

crates/common/src/html_processor.rs

Lines changed: 1 addition & 317 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use std::cell::Cell;
55
use std::rc::Rc;
66

77
use lol_html::{element, html_content::ContentType, text, Settings as RewriterSettings};
8-
use regex::Regex;
98

109
use crate::integrations::{
1110
AttributeRewriteOutcome, IntegrationAttributeContext, IntegrationRegistry,
@@ -22,14 +21,12 @@ pub struct HtmlProcessorConfig {
2221
pub request_host: String,
2322
pub request_scheme: String,
2423
pub integrations: IntegrationRegistry,
25-
pub nextjs_enabled: bool,
26-
pub nextjs_attributes: Vec<String>,
2724
}
2825

2926
impl HtmlProcessorConfig {
3027
/// Create from settings and request parameters
3128
pub fn from_settings(
32-
settings: &Settings,
29+
_settings: &Settings,
3330
integrations: &IntegrationRegistry,
3431
origin_host: &str,
3532
request_host: &str,
@@ -40,8 +37,6 @@ impl HtmlProcessorConfig {
4037
request_host: request_host.to_string(),
4138
request_scheme: request_scheme.to_string(),
4239
integrations: integrations.clone(),
43-
nextjs_enabled: settings.publisher.nextjs.enabled,
44-
nextjs_attributes: settings.publisher.nextjs.rewrite_attributes.clone(),
4540
}
4641
}
4742
}
@@ -75,39 +70,6 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
7570
fn protocol_relative_replacement(&self) -> String {
7671
format!("//{}", self.request_host)
7772
}
78-
79-
fn rewrite_nextjs_values(&self, content: &str, attributes: &[String]) -> Option<String> {
80-
let mut rewritten = content.to_string();
81-
let mut changed = false;
82-
let escaped_origin = regex::escape(&self.origin_host);
83-
for attribute in attributes {
84-
let escaped_attr = regex::escape(attribute);
85-
let pattern = format!(
86-
r#"(?P<prefix>(?:\\*")?{attr}(?:\\*")?:\\*")(?P<scheme>https?://|//){origin}"#,
87-
attr = escaped_attr,
88-
origin = escaped_origin
89-
);
90-
let regex = Regex::new(&pattern).expect("valid Next.js rewrite regex");
91-
let new_value = regex.replace_all(&rewritten, |caps: &regex::Captures| {
92-
let scheme = &caps["scheme"];
93-
let replacement = if scheme == "//" {
94-
format!("//{}", self.request_host)
95-
} else {
96-
self.replacement_url()
97-
};
98-
format!("{}{}", &caps["prefix"], replacement)
99-
});
100-
if new_value != rewritten {
101-
changed = true;
102-
rewritten = new_value.into_owned();
103-
}
104-
}
105-
if changed {
106-
Some(rewritten)
107-
} else {
108-
None
109-
}
110-
}
11173
}
11274

11375
let patterns = Rc::new(UrlPatterns {
@@ -116,8 +78,6 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
11678
request_scheme: config.request_scheme.clone(),
11779
});
11880

119-
let nextjs_attributes = Rc::new(config.nextjs_attributes.clone());
120-
12181
let injected_tsjs = Rc::new(Cell::new(false));
12282
let integration_registry = config.integrations.clone();
12383
let script_rewriters = integration_registry.script_rewriters();
@@ -378,35 +338,6 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
378338
}));
379339
}
380340

381-
if config.nextjs_enabled && !nextjs_attributes.is_empty() {
382-
element_content_handlers.push(text!("script#__NEXT_DATA__", {
383-
let patterns = patterns.clone();
384-
let attributes = nextjs_attributes.clone();
385-
move |text| {
386-
let content = text.as_str();
387-
if let Some(rewritten) = patterns.rewrite_nextjs_values(content, &attributes) {
388-
text.replace(&rewritten, ContentType::Text);
389-
}
390-
Ok(())
391-
}
392-
}));
393-
394-
element_content_handlers.push(text!("script", {
395-
let patterns = patterns.clone();
396-
let attributes = nextjs_attributes.clone();
397-
move |text| {
398-
let content = text.as_str();
399-
if !content.contains("self.__next_f") {
400-
return Ok(());
401-
}
402-
if let Some(rewritten) = patterns.rewrite_nextjs_values(content, &attributes) {
403-
text.replace(&rewritten, ContentType::Text);
404-
}
405-
Ok(())
406-
}
407-
}));
408-
}
409-
410341
let rewriter_settings = RewriterSettings {
411342
element_content_handlers,
412343
..RewriterSettings::default()
@@ -433,116 +364,9 @@ mod tests {
433364
request_host: "test.example.com".to_string(),
434365
request_scheme: "https".to_string(),
435366
integrations: IntegrationRegistry::default(),
436-
nextjs_enabled: false,
437-
nextjs_attributes: vec!["href".to_string(), "link".to_string(), "url".to_string()],
438367
}
439368
}
440369

441-
fn config_from_settings(
442-
settings: &Settings,
443-
registry: &IntegrationRegistry,
444-
) -> HtmlProcessorConfig {
445-
HtmlProcessorConfig::from_settings(
446-
settings,
447-
registry,
448-
"origin.example.com",
449-
"test.example.com",
450-
"https",
451-
)
452-
}
453-
454-
#[test]
455-
fn test_always_injects_tsjs_script() {
456-
let html = r#"<html><head>
457-
<script src="/js/prebid.min.js"></script>
458-
<link rel="preload" as="script" href="https://cdn.prebid.org/prebid.js" />
459-
</head><body></body></html>"#;
460-
461-
let mut settings = create_test_settings();
462-
settings
463-
.integrations
464-
.insert_config(
465-
"prebid",
466-
&json!({
467-
"enabled": true,
468-
"server_url": "https://test-prebid.com/openrtb2/auction",
469-
"timeout_ms": 1000,
470-
"bidders": ["mocktioneer"],
471-
"auto_configure": false,
472-
"debug": false
473-
}),
474-
)
475-
.expect("should update prebid config");
476-
let registry = IntegrationRegistry::new(&settings);
477-
let config = config_from_settings(&settings, &registry);
478-
let processor = create_html_processor(config);
479-
let pipeline_config = PipelineConfig {
480-
input_compression: Compression::None,
481-
output_compression: Compression::None,
482-
chunk_size: 8192,
483-
};
484-
let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
485-
486-
let mut output = Vec::new();
487-
let result = pipeline.process(Cursor::new(html.as_bytes()), &mut output);
488-
assert!(result.is_ok());
489-
let processed = String::from_utf8_lossy(&output);
490-
// When auto-configure is disabled, do not rewrite Prebid references
491-
assert!(processed.contains("/js/prebid.min.js"));
492-
assert!(processed.contains("cdn.prebid.org/prebid.js"));
493-
assert!(processed.contains("tsjs-unified"));
494-
}
495-
496-
#[test]
497-
fn prebid_auto_config_removes_prebid_scripts() {
498-
let html = r#"<html><head>
499-
<script src="https://cdn.prebid.org/prebid.min.js"></script>
500-
<link rel="preload" as="script" href="https://cdn.prebid.org/prebid.js" />
501-
</head><body></body></html>"#;
502-
503-
let mut settings = create_test_settings();
504-
settings
505-
.integrations
506-
.insert_config(
507-
"prebid",
508-
&json!({
509-
"enabled": true,
510-
"server_url": "https://test-prebid.com/openrtb2/auction",
511-
"timeout_ms": 1000,
512-
"bidders": ["mocktioneer"],
513-
"auto_configure": true,
514-
"debug": false
515-
}),
516-
)
517-
.expect("should update prebid config");
518-
let registry = IntegrationRegistry::new(&settings);
519-
let config = config_from_settings(&settings, &registry);
520-
let processor = create_html_processor(config);
521-
let pipeline_config = PipelineConfig {
522-
input_compression: Compression::None,
523-
output_compression: Compression::None,
524-
chunk_size: 8192,
525-
};
526-
let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
527-
528-
let mut output = Vec::new();
529-
let result = pipeline.process(Cursor::new(html.as_bytes()), &mut output);
530-
assert!(result.is_ok());
531-
let processed = String::from_utf8_lossy(&output);
532-
assert!(
533-
processed.contains("tsjs-unified"),
534-
"Unified bundle should be injected"
535-
);
536-
assert!(
537-
!processed.contains("prebid.min.js"),
538-
"Prebid script should be removed"
539-
);
540-
assert!(
541-
!processed.contains("cdn.prebid.org/prebid.js"),
542-
"Prebid preload should be removed"
543-
);
544-
}
545-
546370
#[test]
547371
fn integration_attribute_rewriter_can_remove_elements() {
548372
struct RemovingLinkRewriter;
@@ -597,137 +421,6 @@ mod tests {
597421
assert!(!processed.contains("remove-me"));
598422
}
599423

600-
#[test]
601-
fn test_rewrites_nextjs_script_when_enabled() {
602-
let html = r#"<html><body>
603-
<script id="__NEXT_DATA__" type="application/json">
604-
{"props":{"pageProps":{"primary":{"href":"https://origin.example.com/reviews"},"secondary":{"href":"http://origin.example.com/sign-in"},"fallbackHref":"http://origin.example.com/legacy","protoRelative":"//origin.example.com/assets/logo.png"}}}
605-
</script>
606-
</body></html>"#;
607-
608-
let mut config = create_test_config();
609-
config.nextjs_enabled = true;
610-
config.nextjs_attributes = vec!["href".to_string(), "link".to_string(), "url".to_string()];
611-
let processor = create_html_processor(config);
612-
let pipeline_config = PipelineConfig {
613-
input_compression: Compression::None,
614-
output_compression: Compression::None,
615-
chunk_size: 8192,
616-
};
617-
let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
618-
619-
let mut output = Vec::new();
620-
pipeline
621-
.process(Cursor::new(html.as_bytes()), &mut output)
622-
.unwrap();
623-
let processed = String::from_utf8_lossy(&output);
624-
println!("processed={processed}");
625-
println!("processed stream payload: {}", processed);
626-
println!("processed stream payload: {}", processed);
627-
628-
assert!(
629-
processed.contains(r#""href":"https://test.example.com/reviews""#),
630-
"Should rewrite https Next.js href values"
631-
);
632-
assert!(
633-
processed.contains(r#""href":"https://test.example.com/sign-in""#),
634-
"Should rewrite http Next.js href values"
635-
);
636-
assert!(
637-
processed.contains(r#""fallbackHref":"http://origin.example.com/legacy""#),
638-
"Should leave other fields untouched"
639-
);
640-
assert!(
641-
processed.contains(r#""protoRelative":"//origin.example.com/assets/logo.png""#),
642-
"Should not rewrite non-href keys"
643-
);
644-
assert!(
645-
!processed.contains("\"href\":\"https://origin.example.com/reviews\""),
646-
"Should remove origin https href"
647-
);
648-
assert!(
649-
!processed.contains("\"href\":\"http://origin.example.com/sign-in\""),
650-
"Should remove origin http href"
651-
);
652-
}
653-
654-
#[test]
655-
fn test_rewrites_nextjs_stream_payload() {
656-
let html = r#"<html><body>
657-
<script>
658-
self.__next_f.push([1,"chunk", "prefix {\"inner\":\"value\"} \\\"href\\\":\\\"http://origin.example.com/dashboard\\\", \\\"link\\\":\\\"https://origin.example.com/api-test\\\" suffix", {"href":"http://origin.example.com/secondary","dataHost":"https://origin.example.com/api"}]);
659-
</script>
660-
</body></html>"#;
661-
662-
let mut config = create_test_config();
663-
config.nextjs_enabled = true;
664-
config.nextjs_attributes = vec!["href".to_string(), "link".to_string(), "url".to_string()];
665-
let processor = create_html_processor(config);
666-
let pipeline_config = PipelineConfig {
667-
input_compression: Compression::None,
668-
output_compression: Compression::None,
669-
chunk_size: 8192,
670-
};
671-
let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
672-
673-
let mut output = Vec::new();
674-
pipeline
675-
.process(Cursor::new(html.as_bytes()), &mut output)
676-
.unwrap();
677-
let processed = String::from_utf8_lossy(&output);
678-
let normalized = processed.replace('\\', "");
679-
assert!(
680-
normalized.contains("\"href\":\"https://test.example.com/dashboard\""),
681-
"Should rewrite escaped href sequences inside streamed payloads. Content: {}",
682-
normalized
683-
);
684-
assert!(
685-
normalized.contains("\"href\":\"https://test.example.com/secondary\""),
686-
"Should rewrite plain href attributes inside streamed payloads"
687-
);
688-
assert!(
689-
normalized.contains("\"link\":\"https://test.example.com/api-test\""),
690-
"Should rewrite additional configured attributes like link"
691-
);
692-
assert!(
693-
processed.contains("\"dataHost\":\"https://origin.example.com/api\""),
694-
"Should leave non-href properties untouched"
695-
);
696-
}
697-
698-
#[test]
699-
fn test_nextjs_rewrite_respects_flag() {
700-
let html = r#"<html><body>
701-
<script id="__NEXT_DATA__" type="application/json">
702-
{"props":{"pageProps":{"href":"https://origin.example.com/reviews"}}}
703-
</script>
704-
</body></html>"#;
705-
706-
let config = create_test_config();
707-
let processor = create_html_processor(config);
708-
let pipeline_config = PipelineConfig {
709-
input_compression: Compression::None,
710-
output_compression: Compression::None,
711-
chunk_size: 8192,
712-
};
713-
let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
714-
715-
let mut output = Vec::new();
716-
pipeline
717-
.process(Cursor::new(html.as_bytes()), &mut output)
718-
.unwrap();
719-
let processed = String::from_utf8_lossy(&output);
720-
721-
assert!(
722-
processed.contains("origin.example.com"),
723-
"Should leave Next.js data untouched when disabled"
724-
);
725-
assert!(
726-
!processed.contains("test.example.com/reviews"),
727-
"Should not rewrite Next.js data when flag is off"
728-
);
729-
}
730-
731424
#[test]
732425
fn test_create_html_processor_url_replacement() {
733426
let config = create_test_config();
@@ -774,15 +467,6 @@ mod tests {
774467
assert_eq!(config.origin_host, "origin.test-publisher.com");
775468
assert_eq!(config.request_host, "proxy.example.com");
776469
assert_eq!(config.request_scheme, "https");
777-
assert!(
778-
!config.nextjs_enabled,
779-
"Next.js rewrites should default to disabled"
780-
);
781-
assert_eq!(
782-
config.nextjs_attributes,
783-
vec!["href".to_string(), "link".to_string(), "url".to_string()],
784-
"Should default to rewriting href/link/url attributes"
785-
);
786470
}
787471

788472
#[test]

0 commit comments

Comments
 (0)