22//!
33//! This module provides a StreamProcessor implementation for HTML content.
44use std:: cell:: Cell ;
5- use std:: collections:: BTreeSet ;
65use std:: rc:: Rc ;
76
87use lol_html:: { element, html_content:: ContentType , text, Settings as RewriterSettings } ;
@@ -25,7 +24,6 @@ pub struct HtmlProcessorConfig {
2524 pub integrations : IntegrationRegistry ,
2625 pub nextjs_enabled : bool ,
2726 pub nextjs_attributes : Vec < String > ,
28- pub integration_assets : Vec < String > ,
2927}
3028
3129impl HtmlProcessorConfig {
@@ -37,12 +35,6 @@ impl HtmlProcessorConfig {
3735 request_host : & str ,
3836 request_scheme : & str ,
3937 ) -> Self {
40- let asset_set: BTreeSet < String > = integrations
41- . registered_integrations ( )
42- . into_iter ( )
43- . flat_map ( |meta| meta. assets )
44- . collect ( ) ;
45-
4638 Self {
4739 origin_host : origin_host. to_string ( ) ,
4840 request_host : request_host. to_string ( ) ,
@@ -51,7 +43,6 @@ impl HtmlProcessorConfig {
5143 integrations : integrations. clone ( ) ,
5244 nextjs_enabled : settings. publisher . nextjs . enabled ,
5345 nextjs_attributes : settings. publisher . nextjs . rewrite_attributes . clone ( ) ,
54- integration_assets : asset_set. into_iter ( ) . collect ( ) ,
5546 }
5647 }
5748}
@@ -129,8 +120,6 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
129120 let nextjs_attributes = Rc :: new ( config. nextjs_attributes . clone ( ) ) ;
130121
131122 let injected_tsjs = Rc :: new ( Cell :: new ( false ) ) ;
132- let integration_assets = Rc :: new ( config. integration_assets . clone ( ) ) ;
133- let injected_assets = Rc :: new ( Cell :: new ( false ) ) ;
134123 let integration_registry = config. integrations . clone ( ) ;
135124 let script_rewriters = integration_registry. script_rewriters ( ) ;
136125
@@ -145,27 +134,19 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
145134 }
146135
147136 let mut element_content_handlers = vec ! [
137+ // Inject unified tsjs bundle once at the start of <head>
148138 element!( "head" , {
149139 let injected_tsjs = injected_tsjs. clone( ) ;
150- let integration_assets = integration_assets. clone( ) ;
151- let injected_assets = injected_assets. clone( ) ;
152140 move |el| {
153141 if !injected_tsjs. get( ) {
154- let loader = tsjs:: core_script_tag ( ) ;
142+ let loader = tsjs:: unified_script_tag ( ) ;
155143 el. prepend( & loader, ContentType :: Html ) ;
156144 injected_tsjs. set( true ) ;
157145 }
158- if !integration_assets. is_empty( ) && !injected_assets. get( ) {
159- for asset in integration_assets. iter( ) {
160- let attrs = format!( "async data-tsjs-integration=\" {}\" " , asset) ;
161- let tag = tsjs:: integration_script_tag( asset, & attrs) ;
162- el. append( & tag, ContentType :: Html ) ;
163- }
164- injected_assets. set( true ) ;
165- }
166146 Ok ( ( ) )
167147 }
168148 } ) ,
149+ // Replace URLs in href attributes
169150 element!( "[href]" , {
170151 let patterns = patterns. clone( ) ;
171152 let rewrite_prebid = config. enable_prebid;
@@ -174,7 +155,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
174155 if let Some ( mut href) = el. get_attribute( "href" ) {
175156 let original_href = href. clone( ) ;
176157 if rewrite_prebid && is_prebid_script_url( & href) {
177- href = tsjs :: ext_script_src ( ) ;
158+ el . remove ( ) ;
178159 } else {
179160 let new_href = href
180161 . replace( & patterns. https_origin( ) , & patterns. replacement_url( ) )
@@ -204,6 +185,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
204185 Ok ( ( ) )
205186 }
206187 } ) ,
188+ // Replace URLs in src attributes
207189 element!( "[src]" , {
208190 let patterns = patterns. clone( ) ;
209191 let rewrite_prebid = config. enable_prebid;
@@ -212,7 +194,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
212194 if let Some ( mut src) = el. get_attribute( "src" ) {
213195 let original_src = src. clone( ) ;
214196 if rewrite_prebid && is_prebid_script_url( & src) {
215- src = tsjs :: ext_script_src ( ) ;
197+ el . remove ( ) ;
216198 } else {
217199 let new_src = src
218200 . replace( & patterns. https_origin( ) , & patterns. replacement_url( ) )
@@ -242,6 +224,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
242224 Ok ( ( ) )
243225 }
244226 } ) ,
227+ // Replace URLs in action attributes
245228 element!( "[action]" , {
246229 let patterns = patterns. clone( ) ;
247230 let integrations = integration_registry. clone( ) ;
@@ -275,6 +258,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
275258 Ok ( ( ) )
276259 }
277260 } ) ,
261+ // Replace URLs in srcset attributes (for responsive images)
278262 element!( "[srcset]" , {
279263 let patterns = patterns. clone( ) ;
280264 let integrations = integration_registry. clone( ) ;
@@ -313,6 +297,7 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
313297 Ok ( ( ) )
314298 }
315299 } ) ,
300+ // Replace URLs in imagesrcset attributes (for link preload)
316301 element!( "[imagesrcset]" , {
317302 let patterns = patterns. clone( ) ;
318303 let integrations = integration_registry. clone( ) ;
@@ -405,28 +390,6 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
405390
406391 let rewriter_settings = RewriterSettings {
407392 element_content_handlers,
408-
409- // TODO: Consider adding text content replacement if needed with settings
410- // // Replace URLs in text content
411- // document_content_handlers: vec![lol_html::doc_text!({
412- // move |text| {
413- // let content = text.as_str();
414-
415- // // Apply URL replacements
416- // let mut new_content = content.to_string();
417- // for replacement in replacer.replacements.iter() {
418- // if new_content.contains(&replacement.find) {
419- // new_content = new_content.replace(&replacement.find, &replacement.replace_with);
420- // }
421- // }
422-
423- // if new_content != content {
424- // text.replace(&new_content, lol_html::html_content::ContentType::Text);
425- // }
426-
427- // Ok(())
428- // }
429- // })],
430393 ..RewriterSettings :: default ( )
431394 } ;
432395
@@ -437,24 +400,8 @@ pub fn create_html_processor(config: HtmlProcessorConfig) -> impl StreamProcesso
437400mod tests {
438401 use super :: * ;
439402 use crate :: streaming_processor:: { Compression , PipelineConfig , StreamingPipeline } ;
440- use crate :: tsjs;
441403 use std:: io:: Cursor ;
442404
443- const MOCK_TESTLIGHT_SRC : & str = "https://mock.testassets/testlight.js" ;
444-
445- struct MockBundleGuard ;
446-
447- fn mock_testlight_bundle ( ) -> MockBundleGuard {
448- tsjs:: mock_integration_bundle ( "testlight" , MOCK_TESTLIGHT_SRC ) ;
449- MockBundleGuard
450- }
451-
452- impl Drop for MockBundleGuard {
453- fn drop ( & mut self ) {
454- tsjs:: clear_mock_integration_bundles ( ) ;
455- }
456- }
457-
458405 fn create_test_config ( ) -> HtmlProcessorConfig {
459406 HtmlProcessorConfig {
460407 origin_host : "origin.example.com" . to_string ( ) ,
@@ -464,19 +411,18 @@ mod tests {
464411 integrations : IntegrationRegistry :: default ( ) ,
465412 nextjs_enabled : false ,
466413 nextjs_attributes : vec ! [ "href" . to_string( ) , "link" . to_string( ) , "url" . to_string( ) ] ,
467- integration_assets : Vec :: new ( ) ,
468414 }
469415 }
470416
471417 #[ test]
472- fn test_injects_tsjs_script_and_rewrites_prebid_refs ( ) {
418+ fn test_injects_unified_bundle_and_removes_prebid_refs ( ) {
473419 let html = r#"<html><head>
474420 <script src="/js/prebid.min.js"></script>
475421 <link rel="preload" as="script" href="https://cdn.prebid.org/prebid.js" />
476422 </head><body></body></html>"# ;
477423
478424 let mut config = create_test_config ( ) ;
479- config. enable_prebid = true ; // enable rewriting of Prebid URLs
425+ config. enable_prebid = true ; // enable removal of Prebid URLs
480426 let processor = create_html_processor ( config) ;
481427 let pipeline_config = PipelineConfig {
482428 input_compression : Compression :: None ,
@@ -489,19 +435,20 @@ mod tests {
489435 let result = pipeline. process ( Cursor :: new ( html. as_bytes ( ) ) , & mut output) ;
490436 assert ! ( result. is_ok( ) ) ;
491437 let processed = String :: from_utf8_lossy ( & output) ;
492- assert ! ( processed. contains( "/static/tsjs=tsjs-core.min.js" ) ) ;
493- // Prebid references are rewritten to our extension when auto-configure is on
494- assert ! ( processed. contains( "/static/tsjs=tsjs-ext.min.js" ) ) ;
438+ assert ! ( processed. contains( "/static/tsjs=tsjs-unified.min.js" ) ) ;
439+ // Prebid script references should be removed when auto-configure is on
440+ assert ! ( !processed. contains( "prebid.min.js" ) ) ;
441+ assert ! ( !processed. contains( "cdn.prebid.org/prebid.js" ) ) ;
495442 }
496443
497444 #[ test]
498- fn test_injects_tsjs_script_and_rewrites_prebid_with_query_string ( ) {
445+ fn test_injects_unified_bundle_and_removes_prebid_with_query_string ( ) {
499446 let html = r#"<html><head>
500447 <script src="/wp-content/plugins/prebidjs/js/prebidjs.min.js?v=1.2.3"></script>
501448 </head><body></body></html>"# ;
502449
503450 let mut config = create_test_config ( ) ;
504- config. enable_prebid = true ; // enable rewriting of Prebid URLs
451+ config. enable_prebid = true ; // enable removal of Prebid URLs
505452 let processor = create_html_processor ( config) ;
506453 let pipeline_config = PipelineConfig {
507454 input_compression : Compression :: None ,
@@ -514,19 +461,21 @@ mod tests {
514461 let result = pipeline. process ( Cursor :: new ( html. as_bytes ( ) ) , & mut output) ;
515462 assert ! ( result. is_ok( ) ) ;
516463 let processed = String :: from_utf8_lossy ( & output) ;
517- assert ! ( processed. contains( "/static/tsjs=tsjs-core.min.js" ) ) ;
518- assert ! ( processed. contains( "/static/tsjs=tsjs-ext.min.js" ) ) ;
464+ // Should inject unified bundle
465+ assert ! ( processed. contains( "/static/tsjs=tsjs-unified.min.js" ) ) ;
466+ // Prebid script should be removed
467+ assert ! ( !processed. contains( "prebidjs.min.js" ) ) ;
519468 }
520469
521470 #[ test]
522- fn test_always_injects_tsjs_script ( ) {
471+ fn test_always_injects_unified_bundle ( ) {
523472 let html = r#"<html><head>
524473 <script src="/js/prebid.min.js"></script>
525474 <link rel="preload" as="script" href="https://cdn.prebid.org/prebid.js" />
526475 </head><body></body></html>"# ;
527476
528477 let mut config = create_test_config ( ) ;
529- config. enable_prebid = false ; // No longer affects tsjs injection
478+ config. enable_prebid = false ; // When disabled, don't remove Prebid scripts
530479 let processor = create_html_processor ( config) ;
531480 let pipeline_config = PipelineConfig {
532481 input_compression : Compression :: None ,
@@ -539,10 +488,11 @@ mod tests {
539488 let result = pipeline. process ( Cursor :: new ( html. as_bytes ( ) ) , & mut output) ;
540489 assert ! ( result. is_ok( ) ) ;
541490 let processed = String :: from_utf8_lossy ( & output) ;
542- // When auto-configure is disabled, do not rewrite Prebid references
491+ // When auto-configure is disabled, do not remove Prebid references
543492 assert ! ( processed. contains( "/js/prebid.min.js" ) ) ;
544493 assert ! ( processed. contains( "cdn.prebid.org/prebid.js" ) ) ;
545- assert ! ( processed. contains( "/static/tsjs=tsjs-core.min.js" ) ) ;
494+ // But still inject unified bundle
495+ assert ! ( processed. contains( "/static/tsjs=tsjs-unified.min.js" ) ) ;
546496 }
547497
548498 #[ test]
@@ -712,10 +662,10 @@ mod tests {
712662 use crate :: test_support:: tests:: create_test_settings;
713663
714664 let settings = create_test_settings ( ) ;
715- let registry = IntegrationRegistry :: new ( & settings ) ;
665+ let integrations = IntegrationRegistry :: default ( ) ;
716666 let config = HtmlProcessorConfig :: from_settings (
717667 & settings,
718- & registry ,
668+ & integrations ,
719669 "origin.test-publisher.com" ,
720670 "proxy.example.com" ,
721671 "https" ,
@@ -808,58 +758,6 @@ mod tests {
808758 ) ;
809759 }
810760
811- #[ test]
812- fn test_integration_registry_rewrites_integration_scripts ( ) {
813- use serde_json:: json;
814-
815- let html = r#"<html><head>
816- <script src="https://cdn.testlight.com/v1/testlight.js"></script>
817- </head><body></body></html>"# ;
818-
819- let _bundle_guard = mock_testlight_bundle ( ) ;
820- let mut settings = Settings :: default ( ) ;
821- let shim_src = tsjs:: integration_script_src ( "testlight" ) ;
822- settings
823- . integrations
824- . insert_config (
825- "testlight" ,
826- & json ! ( {
827- "enabled" : true ,
828- "endpoint" : "https://example.com/openrtb2/auction" ,
829- "rewrite_scripts" : true ,
830- "shim_src" : shim_src,
831- } ) ,
832- )
833- . expect ( "should insert testlight config" ) ;
834-
835- let registry = IntegrationRegistry :: new ( & settings) ;
836- let mut config = create_test_config ( ) ;
837- config. integrations = registry;
838-
839- let processor = create_html_processor ( config) ;
840- let pipeline_config = PipelineConfig {
841- input_compression : Compression :: None ,
842- output_compression : Compression :: None ,
843- chunk_size : 8192 ,
844- } ;
845- let mut pipeline = StreamingPipeline :: new ( pipeline_config, processor) ;
846-
847- let mut output = Vec :: new ( ) ;
848- let result = pipeline. process ( Cursor :: new ( html. as_bytes ( ) ) , & mut output) ;
849- assert ! ( result. is_ok( ) ) ;
850-
851- let processed = String :: from_utf8_lossy ( & output) ;
852- let expected_src = tsjs:: integration_script_src ( "testlight" ) ;
853- assert ! (
854- processed. contains( & expected_src) ,
855- "Integration shim should replace integration script reference"
856- ) ;
857- assert ! (
858- !processed. contains( "cdn.testlight.com" ) ,
859- "Original integration URL should be removed"
860- ) ;
861- }
862-
863761 #[ test]
864762 fn test_real_publisher_html_with_gzip ( ) {
865763 use flate2:: read:: GzDecoder ;
0 commit comments