Skip to content

Commit 97d0d1c

Browse files
committed
Merge branch 'feature/standardize-integrations' into feature/refactor-prebid-integration
2 parents d1deffb + 4b9aaab commit 97d0d1c

File tree

11 files changed

+337
-1114
lines changed

11 files changed

+337
-1114
lines changed

crates/common/src/html_processor.rs

Lines changed: 166 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ use lol_html::{element, html_content::ContentType, text, Settings as RewriterSet
88
use regex::Regex;
99

1010
use crate::integrations::{
11-
IntegrationAttributeContext, IntegrationRegistry, IntegrationScriptContext,
11+
AttributeRewriteOutcome, IntegrationAttributeContext, IntegrationRegistry,
12+
IntegrationScriptContext, ScriptRewriteAction,
1213
};
1314
use crate::settings::Settings;
1415
use crate::streaming_processor::{HtmlRewriterAdapter, StreamProcessor};
@@ -148,7 +149,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
148149
href = new_href;
149150
}
150151

151-
if let Some(integration_href) = integrations.rewrite_attribute(
152+
match integrations.rewrite_attribute(
152153
"href",
153154
&href,
154155
&IntegrationAttributeContext {
@@ -158,7 +159,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
158159
origin_host: &patterns.origin_host,
159160
},
160161
) {
161-
href = integration_href;
162+
AttributeRewriteOutcome::Unchanged => {}
163+
AttributeRewriteOutcome::Replaced(integration_href) => {
164+
href = integration_href;
165+
}
166+
AttributeRewriteOutcome::RemoveElement => {
167+
el.remove();
168+
return Ok(());
169+
}
162170
}
163171

164172
if href != original_href {
@@ -181,8 +189,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
181189
if new_src != src {
182190
src = new_src;
183191
}
184-
185-
if let Some(integration_src) = integrations.rewrite_attribute(
192+
match integrations.rewrite_attribute(
186193
"src",
187194
&src,
188195
&IntegrationAttributeContext {
@@ -192,7 +199,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
192199
origin_host: &patterns.origin_host,
193200
},
194201
) {
195-
src = integration_src;
202+
AttributeRewriteOutcome::Unchanged => {}
203+
AttributeRewriteOutcome::Replaced(integration_src) => {
204+
src = integration_src;
205+
}
206+
AttributeRewriteOutcome::RemoveElement => {
207+
el.remove();
208+
return Ok(());
209+
}
196210
}
197211

198212
if src != original_src {
@@ -216,7 +230,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
216230
action = new_action;
217231
}
218232

219-
if let Some(integration_action) = integrations.rewrite_attribute(
233+
match integrations.rewrite_attribute(
220234
"action",
221235
&action,
222236
&IntegrationAttributeContext {
@@ -226,7 +240,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
226240
origin_host: &patterns.origin_host,
227241
},
228242
) {
229-
action = integration_action;
243+
AttributeRewriteOutcome::Unchanged => {}
244+
AttributeRewriteOutcome::Replaced(integration_action) => {
245+
action = integration_action;
246+
}
247+
AttributeRewriteOutcome::RemoveElement => {
248+
el.remove();
249+
return Ok(());
250+
}
230251
}
231252

232253
if action != original_action {
@@ -255,7 +276,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
255276
srcset = new_srcset;
256277
}
257278

258-
if let Some(integration_srcset) = integrations.rewrite_attribute(
279+
match integrations.rewrite_attribute(
259280
"srcset",
260281
&srcset,
261282
&IntegrationAttributeContext {
@@ -265,7 +286,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
265286
origin_host: &patterns.origin_host,
266287
},
267288
) {
268-
srcset = integration_srcset;
289+
AttributeRewriteOutcome::Unchanged => {}
290+
AttributeRewriteOutcome::Replaced(integration_srcset) => {
291+
srcset = integration_srcset;
292+
}
293+
AttributeRewriteOutcome::RemoveElement => {
294+
el.remove();
295+
return Ok(());
296+
}
269297
}
270298

271299
if srcset != original_srcset {
@@ -293,7 +321,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
293321
imagesrcset = new_imagesrcset;
294322
}
295323

296-
if let Some(integration_imagesrcset) = integrations.rewrite_attribute(
324+
match integrations.rewrite_attribute(
297325
"imagesrcset",
298326
&imagesrcset,
299327
&IntegrationAttributeContext {
@@ -303,7 +331,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
303331
origin_host: &patterns.origin_host,
304332
},
305333
) {
306-
imagesrcset = integration_imagesrcset;
334+
AttributeRewriteOutcome::Unchanged => {}
335+
AttributeRewriteOutcome::Replaced(integration_imagesrcset) => {
336+
imagesrcset = integration_imagesrcset;
337+
}
338+
AttributeRewriteOutcome::RemoveElement => {
339+
el.remove();
340+
return Ok(());
341+
}
307342
}
308343

309344
if imagesrcset != original_imagesrcset {
@@ -329,8 +364,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
329364
request_scheme: &patterns.request_scheme,
330365
origin_host: &patterns.origin_host,
331366
};
332-
if let Some(rewritten) = rewriter.rewrite(text.as_str(), &ctx) {
333-
text.replace(&rewritten, ContentType::Text);
367+
match rewriter.rewrite(text.as_str(), &ctx) {
368+
ScriptRewriteAction::Keep => {}
369+
ScriptRewriteAction::Replace(rewritten) => {
370+
text.replace(&rewritten, ContentType::Text);
371+
}
372+
ScriptRewriteAction::RemoveNode => {
373+
text.remove();
374+
}
334375
}
335376
Ok(())
336377
}
@@ -377,26 +418,14 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
377418
#[cfg(test)]
378419
mod tests {
379420
use super::*;
421+
use crate::integrations::{
422+
AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter,
423+
};
380424
use crate::streaming_processor::{Compression, PipelineConfig, StreamingPipeline};
381425
use crate::test_support::tests::create_test_settings;
382-
use crate::tsjs;
383426
use serde_json::json;
384427
use std::io::Cursor;
385-
386-
const MOCK_TESTLIGHT_SRC: &str = "https://mock.testassets/testlight.js";
387-
388-
struct MockBundleGuard;
389-
390-
fn mock_testlight_bundle() -> MockBundleGuard {
391-
tsjs::mock_integration_bundle("testlight", MOCK_TESTLIGHT_SRC);
392-
MockBundleGuard
393-
}
394-
395-
impl Drop for MockBundleGuard {
396-
fn drop(&mut self) {
397-
tsjs::clear_mock_integration_bundles();
398-
}
399-
}
428+
use std::sync::Arc;
400429

401430
fn create_test_config() -> HtmlProcessorConfig {
402431
HtmlProcessorConfig {
@@ -406,7 +435,6 @@ mod tests {
406435
integrations: IntegrationRegistry::default(),
407436
nextjs_enabled: false,
408437
nextjs_attributes: vec!["href".to_string(), "link".to_string(), "url".to_string()],
409-
integration_assets: Vec::new(),
410438
}
411439
}
412440

@@ -462,7 +490,111 @@ mod tests {
462490
// When auto-configure is disabled, do not rewrite Prebid references
463491
assert!(processed.contains("/js/prebid.min.js"));
464492
assert!(processed.contains("cdn.prebid.org/prebid.js"));
465-
assert!(processed.contains("/static/tsjs=tsjs-core.min.js"));
493+
assert!(processed.contains("tsjs-unified"));
494+
}
495+
496+
#[test]
497+
fn prebid_auto_config_removes_prebid_scripts() {
498+
let html = r#"<html><head>
499+
<script src="https://cdn.prebid.org/prebid.min.js"></script>
500+
<link rel="preload" as="script" href="https://cdn.prebid.org/prebid.js" />
501+
</head><body></body></html>"#;
502+
503+
let mut settings = create_test_settings();
504+
settings
505+
.integrations
506+
.insert_config(
507+
"prebid",
508+
&json!({
509+
"enabled": true,
510+
"server_url": "https://test-prebid.com/openrtb2/auction",
511+
"timeout_ms": 1000,
512+
"bidders": ["mocktioneer"],
513+
"auto_configure": true,
514+
"debug": false
515+
}),
516+
)
517+
.expect("should update prebid config");
518+
let registry = IntegrationRegistry::new(&settings);
519+
let config = config_from_settings(&settings, &registry);
520+
let processor = create_html_processor(config);
521+
let pipeline_config = PipelineConfig {
522+
input_compression: Compression::None,
523+
output_compression: Compression::None,
524+
chunk_size: 8192,
525+
};
526+
let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
527+
528+
let mut output = Vec::new();
529+
let result = pipeline.process(Cursor::new(html.as_bytes()), &mut output);
530+
assert!(result.is_ok());
531+
let processed = String::from_utf8_lossy(&output);
532+
assert!(
533+
processed.contains("tsjs-unified"),
534+
"Unified bundle should be injected"
535+
);
536+
assert!(
537+
!processed.contains("prebid.min.js"),
538+
"Prebid script should be removed"
539+
);
540+
assert!(
541+
!processed.contains("cdn.prebid.org/prebid.js"),
542+
"Prebid preload should be removed"
543+
);
544+
}
545+
546+
#[test]
547+
fn integration_attribute_rewriter_can_remove_elements() {
548+
struct RemovingLinkRewriter;
549+
550+
impl IntegrationAttributeRewriter for RemovingLinkRewriter {
551+
fn integration_id(&self) -> &'static str {
552+
"removing"
553+
}
554+
555+
fn handles_attribute(&self, attribute: &str) -> bool {
556+
attribute == "href"
557+
}
558+
559+
fn rewrite(
560+
&self,
561+
_attr_name: &str,
562+
attr_value: &str,
563+
_ctx: &IntegrationAttributeContext<'_>,
564+
) -> AttributeRewriteAction {
565+
if attr_value.contains("remove-me") {
566+
AttributeRewriteAction::remove_element()
567+
} else {
568+
AttributeRewriteAction::keep()
569+
}
570+
}
571+
}
572+
573+
let html = r#"<html><body>
574+
<a href="https://origin.example.com/remove-me">remove</a>
575+
<a href="https://origin.example.com/keep-me">keep</a>
576+
</body></html>"#;
577+
578+
let mut config = create_test_config();
579+
config.integrations =
580+
IntegrationRegistry::from_rewriters(vec![Arc::new(RemovingLinkRewriter)], Vec::new());
581+
582+
let processor = create_html_processor(config);
583+
let pipeline_config = PipelineConfig {
584+
input_compression: Compression::None,
585+
output_compression: Compression::None,
586+
chunk_size: 8192,
587+
};
588+
let mut pipeline = StreamingPipeline::new(pipeline_config, processor);
589+
590+
let mut output = Vec::new();
591+
pipeline
592+
.process(Cursor::new(html.as_bytes()), &mut output)
593+
.unwrap();
594+
let processed = String::from_utf8(output).unwrap();
595+
596+
assert!(processed.contains("keep-me"));
597+
assert!(!processed.contains("remove-me"));
466598
}
467599

468600
#[test]
@@ -726,15 +858,12 @@ mod tests {
726858

727859
#[test]
728860
fn test_integration_registry_rewrites_integration_scripts() {
729-
use serde_json::json;
730-
731861
let html = r#"<html><head>
732862
<script src="https://cdn.testlight.com/v1/testlight.js"></script>
733863
</head><body></body></html>"#;
734864

735-
let _bundle_guard = mock_testlight_bundle();
736865
let mut settings = Settings::default();
737-
let shim_src = tsjs::integration_script_src("testlight");
866+
let shim_src = "https://edge.example.com/static/testlight.js".to_string();
738867
settings
739868
.integrations
740869
.insert_config(
@@ -765,9 +894,8 @@ mod tests {
765894
assert!(result.is_ok());
766895

767896
let processed = String::from_utf8_lossy(&output);
768-
let expected_src = tsjs::integration_script_src("testlight");
769897
assert!(
770-
processed.contains(&expected_src),
898+
processed.contains(&shim_src),
771899
"Integration shim should replace integration script reference"
772900
);
773901
assert!(
@@ -968,4 +1096,3 @@ mod tests {
9681096
);
9691097
}
9701098
}
971-

0 commit comments

Comments
 (0)