104
104
rust_2018_idioms,
105
105
rust_2018_compatibility
106
106
) ]
107
- use kuchiki:: { parse_html, traits:: TendrilSink , NodeRef } ;
107
+ use kuchiki:: { parse_html, traits:: TendrilSink , Node , NodeRef , Specificity } ;
108
108
109
109
pub mod error;
110
110
mod parser;
111
111
112
- use cssparser:: CowRcStr ;
113
112
pub use error:: InlineError ;
114
113
use smallvec:: { smallvec, SmallVec } ;
115
114
use std:: {
116
115
borrow:: Cow ,
116
+ collections:: { hash_map:: Entry , HashMap } ,
117
117
fs:: File ,
118
118
io:: { Read , Write } ,
119
+ ops:: Deref ,
119
120
} ;
120
121
pub use url:: { ParseError , Url } ;
121
122
@@ -264,14 +265,27 @@ impl<'a> CSSInliner<'a> {
264
265
#[ inline]
265
266
pub fn inline_to < W : Write > ( & self , html : & str , target : & mut W ) -> Result < ( ) > {
266
267
let document = parse_html ( ) . one ( html) ;
268
+ // CSS rules may overlap, and the final set of rules applied to an element depend on
269
+ // selectors' specificity - selectors with higher specificity have more priority.
270
+ // Inlining happens in two major steps:
271
+ // 1. All available styles are mapped to respective elements together with their
272
+ // selector's specificity. When two rules overlap on the same declaration, then
273
+ // the one with higher specificity replaces another.
274
+ // 2. Resulting styles are merged into existing "style" tags.
275
+ #[ allow( clippy:: mutable_key_type) ]
276
+ // Each matched element is identified by their raw pointers - they are evaluated once
277
+ // and then reused, which allows O(1) access to find them.
278
+ // Internally, their raw pointers are used to implement `Eq`, which seems like the only
279
+ // reasonable approach to compare them (performance-wise).
280
+ let mut styles = HashMap :: with_capacity ( 128 ) ;
267
281
if self . options . inline_style_tags {
268
282
for style_tag in document
269
283
. select ( "style" )
270
284
. map_err ( |_| error:: InlineError :: ParseError ( Cow :: from ( "Unknown error" ) ) ) ?
271
285
{
272
286
if let Some ( first_child) = style_tag. as_node ( ) . first_child ( ) {
273
287
if let Some ( css_cell) = first_child. as_text ( ) {
274
- process_css ( & document, css_cell. borrow ( ) . as_str ( ) ) ?;
288
+ process_css ( & document, css_cell. borrow ( ) . as_str ( ) , & mut styles ) ?;
275
289
}
276
290
}
277
291
if self . options . remove_style_tags {
@@ -298,12 +312,39 @@ impl<'a> CSSInliner<'a> {
298
312
if !href. is_empty ( ) {
299
313
let url = self . get_full_url ( href) ;
300
314
let css = load_external ( url. as_ref ( ) ) ?;
301
- process_css ( & document, css. as_str ( ) ) ?;
315
+ process_css ( & document, css. as_str ( ) , & mut styles ) ?;
302
316
}
303
317
}
304
318
}
305
319
if let Some ( extra_css) = & self . options . extra_css {
306
- process_css ( & document, extra_css) ?;
320
+ process_css ( & document, extra_css, & mut styles) ?;
321
+ }
322
+ for ( node_id, styles) in styles {
323
+ // SAFETY: All nodes are alive as long as `document` is in scope.
324
+ // Therefore, any `document` children should be alive and it is safe to dereference
325
+ // pointers to them
326
+ let node = unsafe { & * node_id } ;
327
+ // It can be borrowed if the current selector matches <link> tag, that is
328
+ // already borrowed in `inline_to`. We can ignore such matches
329
+ if let Ok ( mut attributes) = node
330
+ . as_element ( )
331
+ . expect ( "Element is expected" )
332
+ . attributes
333
+ . try_borrow_mut ( )
334
+ {
335
+ if let Some ( existing_style) = attributes. get_mut ( "style" ) {
336
+ * existing_style = merge_styles ( existing_style, & styles) ?
337
+ } else {
338
+ let mut final_styles = String :: with_capacity ( 128 ) ;
339
+ for ( name, ( _, value) ) in styles {
340
+ final_styles. push_str ( name. as_str ( ) ) ;
341
+ final_styles. push ( ':' ) ;
342
+ final_styles. push_str ( value. as_str ( ) ) ;
343
+ final_styles. push ( ';' ) ;
344
+ }
345
+ attributes. insert ( "style" , final_styles) ;
346
+ } ;
347
+ }
307
348
}
308
349
document. serialize ( target) ?;
309
350
Ok ( ( ) )
@@ -342,35 +383,47 @@ fn load_external(url: &str) -> Result<String> {
342
383
}
343
384
}
344
385
345
- fn process_css ( document : & NodeRef , css : & str ) -> Result < ( ) > {
386
+ type NodeId = * const Node ;
387
+
388
+ #[ allow( clippy:: mutable_key_type) ]
389
+ fn process_css (
390
+ document : & NodeRef ,
391
+ css : & str ,
392
+ styles : & mut HashMap < NodeId , HashMap < String , ( Specificity , String ) > > ,
393
+ ) -> Result < ( ) > {
346
394
let mut parse_input = cssparser:: ParserInput :: new ( css) ;
347
395
let mut parser = cssparser:: Parser :: new ( & mut parse_input) ;
348
396
let rule_list =
349
397
cssparser:: RuleListParser :: new_for_stylesheet ( & mut parser, parser:: CSSRuleListParser ) ;
350
- for ( selector, declarations) in rule_list. flatten ( ) {
351
- if let Ok ( matching_elements) = document. select ( selector) {
352
- for matching_element in matching_elements {
353
- // It can be borrowed if the current selector matches <link> tag, that is
354
- // already borrowed in `inline_to`. We can ignore such matches
355
- if let Ok ( mut attributes) = matching_element. attributes . try_borrow_mut ( ) {
356
- if let Some ( existing_style) = attributes. get_mut ( "style" ) {
357
- * existing_style = merge_styles ( existing_style, & declarations) ?
358
- } else {
359
- let mut final_styles = String :: with_capacity ( 64 ) ;
360
- for ( name, value) in & declarations {
361
- final_styles. push_str ( name) ;
362
- final_styles. push ( ':' ) ;
363
- final_styles. push_str ( value) ;
364
- final_styles. push ( ';' ) ;
398
+ for ( selectors, declarations) in rule_list. flatten ( ) {
399
+ // Only CSS Syntax Level 3 is supported, therefore it is OK to split by `,`
400
+ // With `is` or `where` selectors (Level 4) this split should be done on the parser level
401
+ for selector in selectors. split ( ',' ) {
402
+ if let Ok ( matching_elements) = document. select ( selector) {
403
+ // There is always only one selector applied
404
+ let specificity = matching_elements. selectors . 0 [ 0 ] . specificity ( ) ;
405
+ for matching_element in matching_elements {
406
+ let element_styles = styles
407
+ . entry ( matching_element. as_node ( ) . deref ( ) )
408
+ . or_insert_with ( || HashMap :: with_capacity ( 16 ) ) ;
409
+ for ( name, value) in & declarations {
410
+ match element_styles. entry ( name. to_string ( ) ) {
411
+ Entry :: Occupied ( mut entry) => {
412
+ if entry. get ( ) . 0 <= specificity {
413
+ entry. insert ( ( specificity, value. to_string ( ) ) ) ;
414
+ }
415
+ }
416
+ Entry :: Vacant ( entry) => {
417
+ entry. insert ( ( specificity, value. to_string ( ) ) ) ;
418
+ }
365
419
}
366
- attributes. insert ( "style" , final_styles) ;
367
- } ;
420
+ }
368
421
}
369
422
}
423
+ // Skip selectors that can't be parsed
424
+ // Ignore not parsable entries. E.g. there is no parser for @media queries
425
+ // Which means that they will fall into this category and will be ignored
370
426
}
371
- // Skip selectors that can't be parsed
372
- // Ignore not parsable entries. E.g. there is no parser for @media queries
373
- // Which means that they will fall into this category and will be ignored
374
427
}
375
428
Ok ( ( ) )
376
429
}
@@ -394,16 +447,19 @@ pub fn inline_to<W: Write>(html: &str, target: &mut W) -> Result<()> {
394
447
CSSInliner :: default ( ) . inline_to ( html, target)
395
448
}
396
449
397
- fn merge_styles ( existing_style : & str , new_styles : & [ parser:: Declaration < ' _ > ] ) -> Result < String > {
450
+ fn merge_styles (
451
+ existing_style : & str ,
452
+ new_styles : & HashMap < String , ( Specificity , String ) > ,
453
+ ) -> Result < String > {
398
454
// Parse existing declarations in "style" attribute
399
455
let mut input = cssparser:: ParserInput :: new ( existing_style) ;
400
456
let mut parser = cssparser:: Parser :: new ( & mut input) ;
401
457
let declarations =
402
458
cssparser:: DeclarationListParser :: new ( & mut parser, parser:: CSSDeclarationListParser ) ;
403
459
// New rules override old ones and we store selectors inline to check the old rules later
404
- let mut buffer: SmallVec < [ & CowRcStr < ' _ > ; 8 ] > = smallvec ! [ ] ;
460
+ let mut buffer: SmallVec < [ & str ; 8 ] > = smallvec ! [ ] ;
405
461
let mut final_styles = String :: with_capacity ( 256 ) ;
406
- for ( property, value) in new_styles {
462
+ for ( property, ( _ , value) ) in new_styles {
407
463
final_styles. push_str ( property) ;
408
464
final_styles. push ( ':' ) ;
409
465
final_styles. push_str ( value) ;
@@ -414,7 +470,7 @@ fn merge_styles(existing_style: &str, new_styles: &[parser::Declaration<'_>]) ->
414
470
for declaration in declarations {
415
471
let ( name, value) = declaration?;
416
472
// Usually this buffer is small and it is faster than checking a {Hash,BTree}Map
417
- if !buffer. contains ( & & name) {
473
+ if !buffer. contains ( & name. as_ref ( ) ) {
418
474
final_styles. push_str ( & name) ;
419
475
final_styles. push ( ':' ) ;
420
476
final_styles. push_str ( value) ;
0 commit comments