Skip to content

Commit 6200d4a

Browse files
committed
More tests.
1 parent 4cbb5f8 commit 6200d4a

File tree

6 files changed

+460
-86
lines changed

6 files changed

+460
-86
lines changed

src/Parser/BlockParser.php

Lines changed: 90 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -374,26 +374,51 @@ protected function extractFootnotes(array $lines): void
374374
$label = $matches[1];
375375
$content = $matches[2];
376376

377-
// Collect continuation lines (indented)
378-
$contentLines = [$content];
377+
// Determine base indentation (2 spaces for footnotes)
378+
$baseIndent = 2;
379+
380+
// Collect continuation lines (indented or blank)
381+
$contentLines = [];
382+
if (trim($content) !== '') {
383+
$contentLines[] = $content;
384+
}
379385
$j = $i + 1;
386+
$hasContent = false;
380387
while ($j < $count) {
381388
$nextLine = $lines[$j];
382389
if ($this->isBlankLine($nextLine)) {
390+
// Add blank line to preserve structure
391+
$contentLines[] = '';
383392
$j++;
384393

385394
continue;
386395
}
387-
if (preg_match('/^\s+(.+)$/', $nextLine, $contMatch)) {
396+
// Check if line has at least base indentation
397+
if (preg_match('/^[ ]{' . $baseIndent . '}(.*)$/', $nextLine, $contMatch)) {
388398
$contentLines[] = $contMatch[1];
399+
$hasContent = true;
400+
$j++;
401+
} elseif (!$hasContent && preg_match('/^\s+(.+)$/', $nextLine, $contMatch)) {
402+
// Allow flexible indentation for first content line
403+
$contentLines[] = $contMatch[1];
404+
$hasContent = true;
389405
$j++;
390406
} else {
391407
break;
392408
}
393409
}
394410

411+
// Remove trailing blank lines
412+
$lineCount = count($contentLines);
413+
while ($lineCount > 0 && $contentLines[$lineCount - 1] === '') {
414+
array_pop($contentLines);
415+
$lineCount--;
416+
}
417+
395418
$footnote = new Footnote($label);
396-
$this->parseBlocks($footnote, $contentLines, 0);
419+
if ($contentLines) {
420+
$this->parseBlocks($footnote, $contentLines, 0);
421+
}
397422
$this->footnotes[$label] = $footnote;
398423
}
399424

@@ -632,6 +657,12 @@ protected function tryParseCodeBlock(Node $parent, array $lines, int $start): ?i
632657
{
633658
$line = $lines[$start];
634659

660+
// Fast early exit: code blocks start with ` or ~ (possibly after whitespace)
661+
$trimmed = ltrim($line);
662+
if ($trimmed === '' || ($trimmed[0] !== '`' && $trimmed[0] !== '~')) {
663+
return null;
664+
}
665+
635666
// Match opening fence: 3+ backticks or tildes, optionally with leading whitespace
636667
if (!preg_match('/^(\s*)(`{3,}|~{3,})(.*)$/', $line, $matches)) {
637668
return null;
@@ -710,6 +741,11 @@ protected function tryParseComment(Node $parent, array $lines, int $start): ?int
710741
{
711742
$line = $lines[$start];
712743

744+
// Fast early exit: comments must contain {%
745+
if (!str_contains($line, '{%')) {
746+
return null;
747+
}
748+
713749
// Match comment opening: {%
714750
if (!str_starts_with(trim($line), '{%')) {
715751
return null;
@@ -779,6 +815,11 @@ protected function tryParseRawBlock(Node $parent, array $lines, int $start): ?in
779815
{
780816
$line = $lines[$start];
781817

818+
// Fast early exit: raw blocks start with ` and contain =
819+
if (!isset($line[0]) || $line[0] !== '`' || !str_contains($line, '=')) {
820+
return null;
821+
}
822+
782823
// Match opening fence with =format: ``` =html or ```=html
783824
if (!preg_match('/^(`{3,})\s+=(\w+)\s*$/', $line, $matches)) {
784825
return null;
@@ -828,6 +869,11 @@ protected function tryParseDiv(Node $parent, array $lines, int $start): ?int
828869
{
829870
$line = $lines[$start];
830871

872+
// Fast early exit: divs start with :
873+
if (!isset($line[0]) || $line[0] !== ':') {
874+
return null;
875+
}
876+
831877
// Match opening fence: 3+ colons with optional class
832878
if (!preg_match('/^(:{3,})\s*(.*)$/', $line, $matches)) {
833879
return null;
@@ -918,22 +964,45 @@ protected function tryParseHeading(Node $parent, array $lines, int $start): ?int
918964
{
919965
$line = $lines[$start];
920966

921-
// Match heading: 1-6 # characters followed by space
922-
if (!preg_match('/^(#{1,6})\s+(.+)$/', $line, $matches)) {
967+
// Fast early exit: headings start with # (possibly after up to 3 spaces)
968+
$trimmed = ltrim($line, ' ');
969+
if (!isset($trimmed[0]) || $trimmed[0] !== '#') {
970+
return null;
971+
}
972+
973+
// Match heading: optional leading spaces, 1-6 # characters, optionally followed by space and content
974+
// Can be: "## Heading", "##", " ## Heading", "##\n", etc.
975+
if (!preg_match('/^[ ]{0,3}(#{1,6})(?:\s+(.*))?$/', $line, $matches)) {
923976
return null;
924977
}
925978

926979
$level = strlen($matches[1]);
927-
$content = $matches[2];
980+
$content = isset($matches[2]) ? trim($matches[2]) : '';
928981

929-
// Collect continuation lines (lines starting with same # or plain text)
982+
// Collect continuation lines
930983
$i = $start + 1;
931984
$count = count($lines);
932985
while ($i < $count) {
933986
$nextLine = $lines[$i];
934-
// Check for continuation with # prefix
935-
if (preg_match('/^#{1,' . $level . '}\s+(.+)$/', $nextLine, $contMatch)) {
936-
$content .= ' ' . $contMatch[1];
987+
988+
// Empty line ends the heading
989+
if ($this->isBlankLine($nextLine)) {
990+
break;
991+
}
992+
993+
// Check for continuation with # prefix (same level or less)
994+
if (preg_match('/^[ ]{0,3}#{1,' . $level . '}\s+(.+)$/', $nextLine, $contMatch)) {
995+
if ($content !== '') {
996+
$content .= "\n";
997+
}
998+
$content .= $contMatch[1];
999+
$i++;
1000+
} elseif (!$this->startsNewBlock($nextLine)) {
1001+
// "Lazy" continuation - plain text continues the heading
1002+
if ($content !== '') {
1003+
$content .= "\n";
1004+
}
1005+
$content .= $nextLine;
9371006
$i++;
9381007
} else {
9391008
break;
@@ -987,6 +1056,11 @@ protected function tryParseBlockQuote(Node $parent, array $lines, int $start): ?
9871056
{
9881057
$line = $lines[$start];
9891058

1059+
// Fast early exit: block quotes start with >
1060+
if (!isset($line[0]) || $line[0] !== '>') {
1061+
return null;
1062+
}
1063+
9901064
// Match block quote: > followed by space or end of line (NOT >text or >>)
9911065
// The > must be followed by a space or be at end of line
9921066
if (!preg_match('/^> (.*)$/', $line, $matches) && !preg_match('/^>$/', $line)) {
@@ -1810,6 +1884,11 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int
18101884
{
18111885
$line = $lines[$start];
18121886

1887+
// Fast early exit: tables start with |
1888+
if (!isset($line[0]) || $line[0] !== '|') {
1889+
return null;
1890+
}
1891+
18131892
// Table rows start and end with | (but the ending | must be outside code spans)
18141893
if (!preg_match('/^\|.*\|$/', $line)) {
18151894
return null;

0 commit comments

Comments
 (0)