@@ -374,26 +374,51 @@ protected function extractFootnotes(array $lines): void
374374 $ label = $ matches [1 ];
375375 $ content = $ matches [2 ];
376376
377- // Collect continuation lines (indented)
378- $ contentLines = [$ content ];
377+ // Determine base indentation (2 spaces for footnotes)
378+ $ baseIndent = 2 ;
379+
380+ // Collect continuation lines (indented or blank)
381+ $ contentLines = [];
382+ if (trim ($ content ) !== '' ) {
383+ $ contentLines [] = $ content ;
384+ }
379385 $ j = $ i + 1 ;
386+ $ hasContent = false ;
380387 while ($ j < $ count ) {
381388 $ nextLine = $ lines [$ j ];
382389 if ($ this ->isBlankLine ($ nextLine )) {
390+ // Add blank line to preserve structure
391+ $ contentLines [] = '' ;
383392 $ j ++;
384393
385394 continue ;
386395 }
387- if (preg_match ('/^\s+(.+)$/ ' , $ nextLine , $ contMatch )) {
396+ // Check if line has at least base indentation
397+ if (preg_match ('/^[ ]{ ' . $ baseIndent . '}(.*)$/ ' , $ nextLine , $ contMatch )) {
388398 $ contentLines [] = $ contMatch [1 ];
399+ $ hasContent = true ;
400+ $ j ++;
401+ } elseif (!$ hasContent && preg_match ('/^\s+(.+)$/ ' , $ nextLine , $ contMatch )) {
402+ // Allow flexible indentation for first content line
403+ $ contentLines [] = $ contMatch [1 ];
404+ $ hasContent = true ;
389405 $ j ++;
390406 } else {
391407 break ;
392408 }
393409 }
394410
411+ // Remove trailing blank lines
412+ $ lineCount = count ($ contentLines );
413+ while ($ lineCount > 0 && $ contentLines [$ lineCount - 1 ] === '' ) {
414+ array_pop ($ contentLines );
415+ $ lineCount --;
416+ }
417+
395418 $ footnote = new Footnote ($ label );
396- $ this ->parseBlocks ($ footnote , $ contentLines , 0 );
419+ if ($ contentLines ) {
420+ $ this ->parseBlocks ($ footnote , $ contentLines , 0 );
421+ }
397422 $ this ->footnotes [$ label ] = $ footnote ;
398423 }
399424
@@ -632,6 +657,12 @@ protected function tryParseCodeBlock(Node $parent, array $lines, int $start): ?i
632657 {
633658 $ line = $ lines [$ start ];
634659
660+ // Fast early exit: code blocks start with ` or ~ (possibly after whitespace)
661+ $ trimmed = ltrim ($ line );
662+ if ($ trimmed === '' || ($ trimmed [0 ] !== '` ' && $ trimmed [0 ] !== '~ ' )) {
663+ return null ;
664+ }
665+
635666 // Match opening fence: 3+ backticks or tildes, optionally with leading whitespace
636667 if (!preg_match ('/^(\s*)(`{3,}|~{3,})(.*)$/ ' , $ line , $ matches )) {
637668 return null ;
@@ -710,6 +741,11 @@ protected function tryParseComment(Node $parent, array $lines, int $start): ?int
710741 {
711742 $ line = $ lines [$ start ];
712743
744+ // Fast early exit: comments must contain {%
745+ if (!str_contains ($ line , '{% ' )) {
746+ return null ;
747+ }
748+
713749 // Match comment opening: {%
714750 if (!str_starts_with (trim ($ line ), '{% ' )) {
715751 return null ;
@@ -779,6 +815,11 @@ protected function tryParseRawBlock(Node $parent, array $lines, int $start): ?in
779815 {
780816 $ line = $ lines [$ start ];
781817
818+ // Fast early exit: raw blocks start with ` and contain =
819+ if (!isset ($ line [0 ]) || $ line [0 ] !== '` ' || !str_contains ($ line , '= ' )) {
820+ return null ;
821+ }
822+
782823 // Match opening fence with =format: ``` =html or ```=html
783824 if (!preg_match ('/^(`{3,})\s+=(\w+)\s*$/ ' , $ line , $ matches )) {
784825 return null ;
@@ -828,6 +869,11 @@ protected function tryParseDiv(Node $parent, array $lines, int $start): ?int
828869 {
829870 $ line = $ lines [$ start ];
830871
872+ // Fast early exit: divs start with :
873+ if (!isset ($ line [0 ]) || $ line [0 ] !== ': ' ) {
874+ return null ;
875+ }
876+
831877 // Match opening fence: 3+ colons with optional class
832878 if (!preg_match ('/^(:{3,})\s*(.*)$/ ' , $ line , $ matches )) {
833879 return null ;
@@ -918,22 +964,45 @@ protected function tryParseHeading(Node $parent, array $lines, int $start): ?int
918964 {
919965 $ line = $ lines [$ start ];
920966
921- // Match heading: 1-6 # characters followed by space
922- if (!preg_match ('/^(#{1,6})\s+(.+)$/ ' , $ line , $ matches )) {
967+ // Fast early exit: headings start with # (possibly after up to 3 spaces)
968+ $ trimmed = ltrim ($ line , ' ' );
969+ if (!isset ($ trimmed [0 ]) || $ trimmed [0 ] !== '# ' ) {
970+ return null ;
971+ }
972+
973+ // Match heading: optional leading spaces, 1-6 # characters, optionally followed by space and content
974+ // Can be: "## Heading", "##", " ## Heading", "##\n", etc.
975+ if (!preg_match ('/^[ ]{0,3}(#{1,6})(?:\s+(.*))?$/ ' , $ line , $ matches )) {
923976 return null ;
924977 }
925978
926979 $ level = strlen ($ matches [1 ]);
927- $ content = $ matches [2 ];
980+ $ content = isset ( $ matches [2 ]) ? trim ( $ matches [ 2 ]) : '' ;
928981
929- // Collect continuation lines (lines starting with same # or plain text)
982+ // Collect continuation lines
930983 $ i = $ start + 1 ;
931984 $ count = count ($ lines );
932985 while ($ i < $ count ) {
933986 $ nextLine = $ lines [$ i ];
934- // Check for continuation with # prefix
935- if (preg_match ('/^#{1, ' . $ level . '}\s+(.+)$/ ' , $ nextLine , $ contMatch )) {
936- $ content .= ' ' . $ contMatch [1 ];
987+
988+ // Empty line ends the heading
989+ if ($ this ->isBlankLine ($ nextLine )) {
990+ break ;
991+ }
992+
993+ // Check for continuation with # prefix (same level or less)
994+ if (preg_match ('/^[ ]{0,3}#{1, ' . $ level . '}\s+(.+)$/ ' , $ nextLine , $ contMatch )) {
995+ if ($ content !== '' ) {
996+ $ content .= "\n" ;
997+ }
998+ $ content .= $ contMatch [1 ];
999+ $ i ++;
1000+ } elseif (!$ this ->startsNewBlock ($ nextLine )) {
1001+ // "Lazy" continuation - plain text continues the heading
1002+ if ($ content !== '' ) {
1003+ $ content .= "\n" ;
1004+ }
1005+ $ content .= $ nextLine ;
9371006 $ i ++;
9381007 } else {
9391008 break ;
@@ -987,6 +1056,11 @@ protected function tryParseBlockQuote(Node $parent, array $lines, int $start): ?
9871056 {
9881057 $ line = $ lines [$ start ];
9891058
1059+ // Fast early exit: block quotes start with >
1060+ if (!isset ($ line [0 ]) || $ line [0 ] !== '> ' ) {
1061+ return null ;
1062+ }
1063+
9901064 // Match block quote: > followed by space or end of line (NOT >text or >>)
9911065 // The > must be followed by a space or be at end of line
9921066 if (!preg_match ('/^> (.*)$/ ' , $ line , $ matches ) && !preg_match ('/^>$/ ' , $ line )) {
@@ -1810,6 +1884,11 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int
18101884 {
18111885 $ line = $ lines [$ start ];
18121886
1887+ // Fast early exit: tables start with |
1888+ if (!isset ($ line [0 ]) || $ line [0 ] !== '| ' ) {
1889+ return null ;
1890+ }
1891+
18131892 // Table rows start and end with | (but the ending | must be outside code spans)
18141893 if (!preg_match ('/^\|.*\|$/ ' , $ line )) {
18151894 return null ;
0 commit comments