@@ -786,7 +786,7 @@ class MathInlineNode extends InlineContentNode {
786786class GlobalTimeNode extends InlineContentNode {
787787 const GlobalTimeNode ({super .debugHtmlNode, required this .datetime});
788788
789- /// Always in UTC, enforced in [_ZulipContentParser .parseInlineContent] .
789+ /// Always in UTC, enforced in [_ZulipInlineContentParser .parseInlineContent] .
790790 final DateTime datetime;
791791
792792 @override
@@ -806,72 +806,68 @@ class GlobalTimeNode extends InlineContentNode {
806806
807807////////////////////////////////////////////////////////////////
808808
809- /// What sort of nodes a [_ZulipContentParser] is currently expecting to find.
810- enum _ParserContext {
811- /// The parser is currently looking for block nodes.
812- block,
809+ String ? _parseMath (dom. Element element, { required bool block}) {
810+ final dom. Element katexElement;
811+ if ( ! block) {
812+ assert (element.localName == 'span' && element.className == 'katex' );
813813
814- /// The parser is currently looking for inline nodes.
815- inline,
816- }
817-
818- class _ZulipContentParser {
819- /// The current state of what sort of nodes the parser is looking for.
820- ///
821- /// This exists for the sake of debug-mode checks,
822- /// and should be read or updated only inside an assertion.
823- _ParserContext _debugParserContext = _ParserContext .block;
824-
825- String ? parseMath (dom.Element element, {required bool block}) {
826- assert (block == (_debugParserContext == _ParserContext .block));
827-
828- final dom.Element katexElement;
829- if (! block) {
830- assert (element.localName == 'span' && element.className == 'katex' );
814+ katexElement = element;
815+ } else {
816+ assert (element.localName == 'span' && element.className == 'katex-display' );
831817
832- katexElement = element;
833- } else {
834- assert (element.localName == 'span' && element.className == 'katex-display' );
835-
836- if (element.nodes.length != 1 ) return null ;
837- final child = element.nodes.single;
838- if (child is ! dom.Element ) return null ;
839- if (child.localName != 'span' ) return null ;
840- if (child.className != 'katex' ) return null ;
841- katexElement = child;
842- }
843-
844- // Expect two children span.katex-mathml, span.katex-html .
845- // For now we only care about the .katex-mathml .
846- if (katexElement.nodes.isEmpty) return null ;
847- final child = katexElement.nodes.first;
818+ if (element.nodes.length != 1 ) return null ;
819+ final child = element.nodes.single;
848820 if (child is ! dom.Element ) return null ;
849821 if (child.localName != 'span' ) return null ;
850- if (child.className != 'katex-mathml' ) return null ;
851-
852- if (child.nodes.length != 1 ) return null ;
853- final grandchild = child.nodes.single;
854- if (grandchild is ! dom.Element ) return null ;
855- if (grandchild.localName != 'math' ) return null ;
856- if (grandchild.attributes['display' ] != (block ? 'block' : null )) return null ;
857- if (grandchild.namespaceUri != 'http://www.w3.org/1998/Math/MathML' ) return null ;
858-
859- if (grandchild.nodes.length != 1 ) return null ;
860- final greatgrand = grandchild.nodes.single;
861- if (greatgrand is ! dom.Element ) return null ;
862- if (greatgrand.localName != 'semantics' ) return null ;
863-
864- if (greatgrand.nodes.isEmpty) return null ;
865- final descendant4 = greatgrand.nodes.last;
866- if (descendant4 is ! dom.Element ) return null ;
867- if (descendant4.localName != 'annotation' ) return null ;
868- if (descendant4.attributes['encoding' ] != 'application/x-tex' ) return null ;
869-
870- return descendant4.text.trim ();
822+ if (child.className != 'katex' ) return null ;
823+ katexElement = child;
824+ }
825+
826+ // Expect two children span.katex-mathml, span.katex-html .
827+ // For now we only care about the .katex-mathml .
828+ if (katexElement.nodes.isEmpty) return null ;
829+ final child = katexElement.nodes.first;
830+ if (child is ! dom.Element ) return null ;
831+ if (child.localName != 'span' ) return null ;
832+ if (child.className != 'katex-mathml' ) return null ;
833+
834+ if (child.nodes.length != 1 ) return null ;
835+ final grandchild = child.nodes.single;
836+ if (grandchild is ! dom.Element ) return null ;
837+ if (grandchild.localName != 'math' ) return null ;
838+ if (grandchild.attributes['display' ] != (block ? 'block' : null )) return null ;
839+ if (grandchild.namespaceUri != 'http://www.w3.org/1998/Math/MathML' ) return null ;
840+
841+ if (grandchild.nodes.length != 1 ) return null ;
842+ final greatgrand = grandchild.nodes.single;
843+ if (greatgrand is ! dom.Element ) return null ;
844+ if (greatgrand.localName != 'semantics' ) return null ;
845+
846+ if (greatgrand.nodes.isEmpty) return null ;
847+ final descendant4 = greatgrand.nodes.last;
848+ if (descendant4 is ! dom.Element ) return null ;
849+ if (descendant4.localName != 'annotation' ) return null ;
850+ if (descendant4.attributes['encoding' ] != 'application/x-tex' ) return null ;
851+
852+ return descendant4.text.trim ();
853+ }
854+
855+ /// Parser for the inline-content subtrees within Zulip content HTML.
856+ ///
857+ /// The only entry point to this class is [parseBlockInline] .
858+ ///
859+ /// After a call to [parseBlockInline] returns, the [_ZulipInlineContentParser]
860+ /// instance has been reset to its starting state, and can be re-used for
861+ /// parsing other subtrees.
862+ class _ZulipInlineContentParser {
863+ InlineContentNode ? parseInlineMath (dom.Element element) {
864+ final debugHtmlNode = kDebugMode ? element : null ;
865+ final texSource = _parseMath (element, block: false );
866+ if (texSource == null ) return null ;
867+ return MathInlineNode (texSource: texSource, debugHtmlNode: debugHtmlNode);
871868 }
872869
873870 UserMentionNode ? parseUserMention (dom.Element element) {
874- assert (_debugParserContext == _ParserContext .inline);
875871 assert (element.localName == 'span' );
876872 final debugHtmlNode = kDebugMode ? element : null ;
877873
@@ -945,7 +941,6 @@ class _ZulipContentParser {
945941 static final _emojiCodeFromClassNameRegexp = RegExp (r"emoji-([^ ]+)" );
946942
947943 InlineContentNode parseInlineContent (dom.Node node) {
948- assert (_debugParserContext == _ParserContext .inline);
949944 final debugHtmlNode = kDebugMode ? node : null ;
950945 InlineContentNode unimplemented () => UnimplementedInlineContentNode (htmlNode: node);
951946
@@ -1025,36 +1020,49 @@ class _ZulipContentParser {
10251020 }
10261021
10271022 if (localName == 'span' && className == 'katex' ) {
1028- final texSource = parseMath (element, block: false );
1029- if (texSource == null ) return unimplemented ();
1030- return MathInlineNode (texSource: texSource, debugHtmlNode: debugHtmlNode);
1023+ return parseInlineMath (element) ?? unimplemented ();
10311024 }
10321025
10331026 // TODO more types of node
10341027 return unimplemented ();
10351028 }
10361029
10371030 List <InlineContentNode > parseInlineContentList (List <dom.Node > nodes) {
1038- assert (_debugParserContext == _ParserContext .inline);
10391031 return nodes.map (parseInlineContent).toList (growable: false );
10401032 }
10411033
1034+ /// Parse the children of a [BlockInlineContainerNode] , making up a
1035+ /// complete subtree of inline content with no further inline ancestors.
10421036 ({List <InlineContentNode > nodes, List <LinkNode >? links}) parseBlockInline (List <dom.Node > nodes) {
1043- assert (_debugParserContext == _ParserContext .block);
1044- assert (() {
1045- _debugParserContext = _ParserContext .inline;
1046- return true ;
1047- }());
10481037 final resultNodes = parseInlineContentList (nodes);
1049- assert (() {
1050- _debugParserContext = _ParserContext .block;
1051- return true ;
1052- }());
10531038 return (nodes: resultNodes, links: _takeLinkNodes ());
10541039 }
1040+ }
1041+
1042+ /// Parser for a complete piece of Zulip HTML content, a [ZulipContent] .
1043+ ///
1044+ /// The only entry point to this class is [parse] .
1045+ class _ZulipContentParser {
1046+ /// The single inline-content parser used and re-used throughout parsing of
1047+ /// a complete piece of Zulip HTML content.
1048+ ///
1049+ /// Because block content can never appear nested inside inline content,
1050+ /// there's never a need for more than one of these at a time,
1051+ /// so we can allocate just one up front.
1052+ final inlineParser = _ZulipInlineContentParser ();
1053+
1054+ ({List <InlineContentNode > nodes, List <LinkNode >? links}) parseBlockInline (List <dom.Node > nodes) {
1055+ return inlineParser.parseBlockInline (nodes);
1056+ }
1057+
1058+ BlockContentNode parseMathBlock (dom.Element element) {
1059+ final debugHtmlNode = kDebugMode ? element : null ;
1060+ final texSource = _parseMath (element, block: true );
1061+ if (texSource == null ) return UnimplementedBlockContentNode (htmlNode: element);
1062+ return MathBlockNode (texSource: texSource, debugHtmlNode: debugHtmlNode);
1063+ }
10551064
10561065 BlockContentNode parseListNode (dom.Element element) {
1057- assert (_debugParserContext == _ParserContext .block);
10581066 ListStyle ? listStyle;
10591067 switch (element.localName) {
10601068 case 'ol' : listStyle = ListStyle .ordered; break ;
@@ -1077,7 +1085,6 @@ class _ZulipContentParser {
10771085 }
10781086
10791087 BlockContentNode parseSpoilerNode (dom.Element divElement) {
1080- assert (_debugParserContext == _ParserContext .block);
10811088 assert (divElement.localName == 'div'
10821089 && divElement.className == 'spoiler-block' );
10831090
@@ -1097,7 +1104,6 @@ class _ZulipContentParser {
10971104 }
10981105
10991106 BlockContentNode parseCodeBlock (dom.Element divElement) {
1100- assert (_debugParserContext == _ParserContext .block);
11011107 final mainElement = () {
11021108 assert (divElement.localName == 'div'
11031109 && divElement.className == "codehilite" );
@@ -1180,7 +1186,6 @@ class _ZulipContentParser {
11801186 static final _imageDimensionsRegExp = RegExp (r'^(\d+)x(\d+)$' );
11811187
11821188 BlockContentNode parseImageNode (dom.Element divElement) {
1183- assert (_debugParserContext == _ParserContext .block);
11841189 final elements = () {
11851190 assert (divElement.localName == 'div'
11861191 && divElement.className == 'message_inline_image' );
@@ -1272,7 +1277,6 @@ class _ZulipContentParser {
12721277 }();
12731278
12741279 BlockContentNode parseInlineVideoNode (dom.Element divElement) {
1275- assert (_debugParserContext == _ParserContext .block);
12761280 assert (divElement.localName == 'div'
12771281 && _videoClassNameRegexp.hasMatch (divElement.className));
12781282
@@ -1305,7 +1309,6 @@ class _ZulipContentParser {
13051309 }
13061310
13071311 BlockContentNode parseEmbedVideoNode (dom.Element divElement) {
1308- assert (_debugParserContext == _ParserContext .block);
13091312 assert (divElement.localName == 'div'
13101313 && _videoClassNameRegexp.hasMatch (divElement.className));
13111314
@@ -1344,7 +1347,6 @@ class _ZulipContentParser {
13441347 }
13451348
13461349 BlockContentNode parseTableContent (dom.Element tableElement) {
1347- assert (_debugParserContext == _ParserContext .block);
13481350 assert (tableElement.localName == 'table'
13491351 && tableElement.className.isEmpty);
13501352
@@ -1452,7 +1454,6 @@ class _ZulipContentParser {
14521454 }
14531455
14541456 BlockContentNode parseBlockContent (dom.Node node) {
1455- assert (_debugParserContext == _ParserContext .block);
14561457 final debugHtmlNode = kDebugMode ? node : null ;
14571458 if (node is ! dom.Element ) {
14581459 return UnimplementedBlockContentNode (htmlNode: node);
@@ -1480,9 +1481,7 @@ class _ZulipContentParser {
14801481 // The case with the `<br>\n` can happen when at the end of a quote;
14811482 // it seems like a glitch in the server's Markdown processing,
14821483 // so hopefully there just aren't any further such glitches.
1483- final texSource = parseMath (child, block: true );
1484- if (texSource == null ) return UnimplementedBlockContentNode (htmlNode: node);
1485- return MathBlockNode (texSource: texSource, debugHtmlNode: debugHtmlNode);
1484+ return parseMathBlock (child);
14861485 }
14871486 }
14881487 }
@@ -1579,10 +1578,15 @@ class _ZulipContentParser {
15791578 ///
15801579 /// See [ParagraphNode] .
15811580 List <BlockContentNode > parseImplicitParagraphBlockContentList (dom.NodeList nodes) {
1582- assert (_debugParserContext == _ParserContext .block);
15831581 final List <BlockContentNode > result = [];
1584- final List <dom. Node > currentParagraph = [];
1582+
15851583 List <ImageNode > imageNodes = [];
1584+ void consumeImageNodes () {
1585+ result.add (ImageNodeList (imageNodes));
1586+ imageNodes = [];
1587+ }
1588+
1589+ final List <dom.Node > currentParagraph = [];
15861590 void consumeParagraph () {
15871591 final parsed = parseBlockInline (currentParagraph);
15881592 result.add (ParagraphNode (
@@ -1597,8 +1601,7 @@ class _ZulipContentParser {
15971601
15981602 if (_isPossibleInlineNode (node)) {
15991603 if (imageNodes.isNotEmpty) {
1600- result.add (ImageNodeList (imageNodes));
1601- imageNodes = [];
1604+ consumeImageNodes ();
16021605 // In a context where paragraphs are implicit it should be impossible
16031606 // to have more paragraph content after image previews.
16041607 result.add (UnimplementedBlockContentNode (htmlNode: node));
@@ -1613,24 +1616,25 @@ class _ZulipContentParser {
16131616 imageNodes.add (block);
16141617 continue ;
16151618 }
1616- if (imageNodes.isNotEmpty) {
1617- result.add (ImageNodeList (imageNodes));
1618- imageNodes = [];
1619- }
1619+ if (imageNodes.isNotEmpty) consumeImageNodes ();
16201620 result.add (block);
16211621 }
16221622 if (currentParagraph.isNotEmpty) consumeParagraph ();
1623- if (imageNodes.isNotEmpty) result.add (ImageNodeList (imageNodes));
1624-
1623+ if (imageNodes.isNotEmpty) consumeImageNodes ();
16251624 return result;
16261625 }
16271626
16281627 static final _redundantLineBreaksRegexp = RegExp (r'^\n+$' );
16291628
16301629 List <BlockContentNode > parseBlockContentList (dom.NodeList nodes) {
1631- assert (_debugParserContext == _ParserContext .block);
16321630 final List <BlockContentNode > result = [];
1631+
16331632 List <ImageNode > imageNodes = [];
1633+ void consumeImageNodes () {
1634+ result.add (ImageNodeList (imageNodes));
1635+ imageNodes = [];
1636+ }
1637+
16341638 for (final node in nodes) {
16351639 // We get a bunch of newline Text nodes between paragraphs.
16361640 // A browser seems to ignore these; let's do the same.
@@ -1643,13 +1647,10 @@ class _ZulipContentParser {
16431647 imageNodes.add (block);
16441648 continue ;
16451649 }
1646- if (imageNodes.isNotEmpty) {
1647- result.add (ImageNodeList (imageNodes));
1648- imageNodes = [];
1649- }
1650+ if (imageNodes.isNotEmpty) consumeImageNodes ();
16501651 result.add (block);
16511652 }
1652- if (imageNodes.isNotEmpty) result. add ( ImageNodeList (imageNodes) );
1653+ if (imageNodes.isNotEmpty) consumeImageNodes ( );
16531654 return result;
16541655 }
16551656
@@ -1660,6 +1661,8 @@ class _ZulipContentParser {
16601661 }
16611662}
16621663
1664+ /// Parse a complete piece of Zulip HTML content,
1665+ /// such as an entire value of [Message.content] .
16631666ZulipContent parseContent (String html) {
16641667 return _ZulipContentParser ().parse (html);
16651668}
0 commit comments