@@ -538,8 +538,7 @@ class HtmlParser extends StatelessWidget {
538
538
/// between and among inline elements. It does so by creating a boolean [Context]
539
539
/// and passing it to the [_processInlineWhitespaceRecursive] function.
540
540
static StyledElement _processInlineWhitespace (StyledElement tree) {
541
- final whitespaceParsingContext = Context (false );
542
- tree = _processInlineWhitespaceRecursive (tree, whitespaceParsingContext);
541
+ tree = _processInlineWhitespaceRecursive (tree, Context (false ));
543
542
return tree;
544
543
}
545
544
@@ -548,33 +547,77 @@ class HtmlParser extends StatelessWidget {
548
547
/// to the w3's HTML whitespace processing specification linked to above.
549
548
static StyledElement _processInlineWhitespaceRecursive (
550
549
StyledElement tree,
551
- Context <bool > wpc ,
550
+ Context <bool > keepLeadingSpace ,
552
551
) {
553
- if (tree.style.display == Display .BLOCK ) {
554
- wpc.data = false ;
555
- }
556
-
557
- if (tree is ImageContentElement || tree is SvgContentElement ) {
558
- wpc.data = false ;
559
- }
560
-
561
552
if (tree is TextContentElement ) {
562
- int index = - 1 ;
563
- if ((tree.element? .nodes.length ?? 0 ) > 1 ) {
564
- index = tree.element? .nodes.indexWhere ((element) => element == tree.node) ?? - 1 ;
553
+ /// initialize indices to negative numbers to make conditionals a little easier
554
+ int textIndex = - 1 ;
555
+ int elementIndex = - 1 ;
556
+ /// initialize parent after to a whitespace to account for elements that are
557
+ /// the last child in the list of elements
558
+ String parentAfterText = " " ;
559
+ /// find the index of the text in the current tree
560
+ if ((tree.element? .nodes.length ?? 0 ) >= 1 ) {
561
+ textIndex = tree.element? .nodes.indexWhere ((element) => element == tree.node) ?? - 1 ;
562
+ }
563
+ /// get the parent nodes
564
+ dom.NodeList ? parentNodes = tree.element? .parent? .nodes;
565
+ /// find the index of the tree itself in the parent nodes
566
+ if ((parentNodes? .length ?? 0 ) >= 1 ) {
567
+ elementIndex = parentNodes? .indexWhere ((element) => element == tree.element) ?? - 1 ;
565
568
}
566
- if (index < 1 && tree.text! .startsWith (' ' )
567
- && tree.element? .localName != "br" ) {
569
+ /// if the tree is any node except the last node in the node list and the
570
+ /// next node in the node list is a text node, then get its text. Otherwise
571
+ /// the next node will be a [dom.Element] , so keep unwrapping that until
572
+ /// we get the underlying text node, and finally get its text.
573
+ if (elementIndex < (parentNodes? .length ?? 1 ) - 1 && parentNodes? [elementIndex + 1 ] is dom.Text ) {
574
+ parentAfterText = parentNodes? [elementIndex + 1 ].text ?? " " ;
575
+ } else if (elementIndex < (parentNodes? .length ?? 1 ) - 1 ) {
576
+ var parentAfter = parentNodes? [elementIndex + 1 ];
577
+ while (parentAfter is dom.Element ) {
578
+ if (parentAfter.nodes.isNotEmpty) {
579
+ parentAfter = parentAfter.nodes.first;
580
+ } else {
581
+ break ;
582
+ }
583
+ }
584
+ parentAfterText = parentAfter? .text ?? " " ;
585
+ }
586
+ /// If the text is the first element in the current tree node list, it
587
+ /// starts with a whitespace, it isn't a line break, and either the
588
+ /// whitespace is unnecessary or it is a block element, delete it.
589
+ ///
590
+ /// We should also delete the whitespace at any point in the node list
591
+ /// if the previous element is a <br> because that tag makes the element
592
+ /// act like a block element.
593
+ if (textIndex < 1
594
+ && tree.text! .startsWith (' ' )
595
+ && tree.element? .localName != "br"
596
+ && (! keepLeadingSpace.data
597
+ || BLOCK_ELEMENTS .contains (tree.element? .localName ?? "" ))
598
+ ) {
599
+ tree.text = tree.text! .replaceFirst (' ' , '' );
600
+ } else if (textIndex >= 1
601
+ && tree.text! .startsWith (' ' )
602
+ && tree.element? .nodes[textIndex - 1 ] is dom.Element
603
+ && (tree.element? .nodes[textIndex - 1 ] as dom.Element ).localName == "br"
604
+ ) {
568
605
tree.text = tree.text! .replaceFirst (' ' , '' );
569
606
}
570
- if (index == (tree.element? .nodes.length ?? 1 ) - 1
571
- && (tree.text! .endsWith (' ' ) || tree.text! .endsWith ('\n ' ))
572
- && tree.element? .localName != "br" ) {
573
- tree.text = tree.text! .trimRight ();
607
+ /// If the text is the last element in the current tree node list, it isn't
608
+ /// a line break, and the next text node starts with a whitespace,
609
+ /// update the [Context] to signify to that next text node whether it should
610
+ /// keep its whitespace. This is based on whether the current text ends with a
611
+ /// whitespace.
612
+ if (textIndex == (tree.element? .nodes.length ?? 1 ) - 1
613
+ && tree.element? .localName != "br"
614
+ && parentAfterText.startsWith (' ' )
615
+ ) {
616
+ keepLeadingSpace.data = ! tree.text! .endsWith (' ' );
574
617
}
575
618
}
576
619
577
- tree.children.forEach ((e) => _processInlineWhitespaceRecursive (e, wpc ));
620
+ tree.children.forEach ((e) => _processInlineWhitespaceRecursive (e, keepLeadingSpace ));
578
621
579
622
return tree;
580
623
}
0 commit comments