Skip to content

Commit 32c9290

Browse files
authored
Merge pull request #381 from commonmark/tables-without-blank-line-before
Parse tables without blank line before
2 parents 8b6ecb6 + bb57a5f commit 32c9290

File tree

12 files changed

+286
-71
lines changed

12 files changed

+286
-71
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ with the exception that 0.x versions can break between minor versions.
1010
### Added
1111
- More documentation with examples for `Node` classes
1212
### Changed
13+
- GitHub tables: Tables are now parsed even if there's no blank line before the
14+
table heading, matching GitHub's behavior.
1315
### Fixed
1416
- `MarkdownRenderer`: Fix precedence for `nodeRendererFactory`: Factories passed
1517
to the builder can now override rendering for core node types.

commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableBlockParser.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -274,17 +274,17 @@ public static class Factory extends AbstractBlockParserFactory {
274274
@Override
275275
public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockParser) {
276276
List<SourceLine> paragraphLines = matchedBlockParser.getParagraphLines().getLines();
277-
if (paragraphLines.size() == 1 && Characters.find('|', paragraphLines.get(0).getContent(), 0) != -1) {
277+
if (paragraphLines.size() >= 1 && Characters.find('|', paragraphLines.get(paragraphLines.size() - 1).getContent(), 0) != -1) {
278278
SourceLine line = state.getLine();
279279
SourceLine separatorLine = line.substring(state.getIndex(), line.getContent().length());
280280
List<TableCellInfo> columns = parseSeparator(separatorLine.getContent());
281281
if (columns != null && !columns.isEmpty()) {
282-
SourceLine paragraph = paragraphLines.get(0);
282+
SourceLine paragraph = paragraphLines.get(paragraphLines.size() - 1);
283283
List<SourceLine> headerCells = split(paragraph);
284284
if (columns.size() >= headerCells.size()) {
285285
return BlockStart.of(new TableBlockParser(columns, paragraph))
286286
.atIndex(state.getIndex())
287-
.replaceActiveBlockParser();
287+
.replaceParagraphLines(1);
288288
}
289289
}
290290
}

commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TablesTest.java

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
package org.commonmark.ext.gfm.tables;
22

33
import org.commonmark.Extension;
4-
import org.commonmark.node.Node;
5-
import org.commonmark.node.SourceSpan;
4+
import org.commonmark.node.*;
65
import org.commonmark.parser.IncludeSourceSpans;
76
import org.commonmark.parser.Parser;
87
import org.commonmark.renderer.html.AttributeProvider;
@@ -79,11 +78,6 @@ public void separatorNeedsPipes() {
7978
assertRendering("Abc|Def\n|--- ---", "<p>Abc|Def\n|--- ---</p>\n");
8079
}
8180

82-
@Test
83-
public void headerMustBeOneLine() {
84-
assertRendering("No\nAbc|Def\n---|---", "<p>No\nAbc|Def\n---|---</p>\n");
85-
}
86-
8781
@Test
8882
public void oneHeadNoBody() {
8983
assertRendering("Abc|Def\n---|---", "<table>\n" +
@@ -702,6 +696,26 @@ public void danglingPipe() {
702696
"<p>|</p>\n");
703697
}
704698

699+
@Test
700+
public void interruptsParagraph() {
701+
assertRendering("text\n" +
702+
"|a |\n" +
703+
"|---|\n" +
704+
"|b |", "<p>text</p>\n" +
705+
"<table>\n" +
706+
"<thead>\n" +
707+
"<tr>\n" +
708+
"<th>a</th>\n" +
709+
"</tr>\n" +
710+
"</thead>\n" +
711+
"<tbody>\n" +
712+
"<tr>\n" +
713+
"<td>b</td>\n" +
714+
"</tr>\n" +
715+
"</tbody>\n" +
716+
"</table>\n");
717+
}
718+
705719
@Test
706720
public void attributeProviderIsApplied() {
707721
AttributeProviderFactory factory = new AttributeProviderFactory() {
@@ -835,6 +849,36 @@ public void sourceSpans() {
835849
assertThat(bodyRow3Cell2.getSourceSpans()).isEqualTo(List.of());
836850
}
837851

852+
@Test
853+
public void sourceSpansWhenInterrupting() {
854+
var parser = Parser.builder()
855+
.extensions(EXTENSIONS)
856+
.includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES)
857+
.build();
858+
var document = parser.parse("a\n" +
859+
"bc\n" +
860+
"|de|\n" +
861+
"|---|\n" +
862+
"|fg|");
863+
864+
var paragraph = (Paragraph) document.getFirstChild();
865+
var text = (Text) paragraph.getFirstChild();
866+
assertThat(text.getLiteral()).isEqualTo("a");
867+
assertThat(text.getNext()).isInstanceOf(SoftLineBreak.class);
868+
var text2 = (Text) text.getNext().getNext();
869+
assertThat(text2.getLiteral()).isEqualTo("bc");
870+
871+
assertThat(paragraph.getSourceSpans()).isEqualTo(List.of(
872+
SourceSpan.of(0, 0, 0, 1),
873+
SourceSpan.of(1, 0, 2, 2)));
874+
875+
var table = (TableBlock) document.getLastChild();
876+
assertThat(table.getSourceSpans()).isEqualTo(List.of(
877+
SourceSpan.of(2, 0, 5, 4),
878+
SourceSpan.of(3, 0, 10, 5),
879+
SourceSpan.of(4, 0, 16, 4)));
880+
}
881+
838882
@Override
839883
protected String render(String source) {
840884
return RENDERER.render(PARSER.parse(source));

commonmark/src/main/java/org/commonmark/internal/BlockStartImpl.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ public class BlockStartImpl extends BlockStart {
99
private int newIndex = -1;
1010
private int newColumn = -1;
1111
private boolean replaceActiveBlockParser = false;
12+
private int replaceParagraphLines = 0;
1213

1314
public BlockStartImpl(BlockParser... blockParsers) {
1415
this.blockParsers = blockParsers;
@@ -30,6 +31,10 @@ public boolean isReplaceActiveBlockParser() {
3031
return replaceActiveBlockParser;
3132
}
3233

34+
int getReplaceParagraphLines() {
35+
return replaceParagraphLines;
36+
}
37+
3338
@Override
3439
public BlockStart atIndex(int newIndex) {
3540
this.newIndex = newIndex;
@@ -48,4 +53,12 @@ public BlockStart replaceActiveBlockParser() {
4853
return this;
4954
}
5055

56+
@Override
57+
public BlockStart replaceParagraphLines(int lines) {
58+
if (!(lines >= 1)) {
59+
throw new IllegalArgumentException("Lines must be >= 1");
60+
}
61+
this.replaceParagraphLines = lines;
62+
return this;
63+
}
5164
}

commonmark/src/main/java/org/commonmark/internal/DocumentParser.java

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -270,9 +270,15 @@ private void parseLine(String ln, int inputIndex) {
270270
}
271271

272272
List<SourceSpan> replacedSourceSpans = null;
273-
if (blockStart.isReplaceActiveBlockParser()) {
274-
Block replacedBlock = prepareActiveBlockParserForReplacement();
275-
replacedSourceSpans = replacedBlock.getSourceSpans();
273+
if (blockStart.getReplaceParagraphLines() >= 1 || blockStart.isReplaceActiveBlockParser()) {
274+
var activeBlockParser = getActiveBlockParser();
275+
if (activeBlockParser instanceof ParagraphParser) {
276+
var paragraphParser = (ParagraphParser) activeBlockParser;
277+
var lines = blockStart.isReplaceActiveBlockParser() ? Integer.MAX_VALUE : blockStart.getReplaceParagraphLines();
278+
replacedSourceSpans = replaceParagraphLines(lines, paragraphParser);
279+
} else if (blockStart.isReplaceActiveBlockParser()) {
280+
replacedSourceSpans = prepareActiveBlockParserForReplacement(activeBlockParser);
281+
}
276282
}
277283

278284
for (BlockParser newBlockParser : blockStart.getBlockParsers()) {
@@ -498,24 +504,23 @@ private OpenBlockParser deactivateBlockParser() {
498504
return openBlockParsers.remove(openBlockParsers.size() - 1);
499505
}
500506

501-
private Block prepareActiveBlockParserForReplacement() {
502-
// Note that we don't want to parse inlines, as it's getting replaced.
503-
BlockParser old = deactivateBlockParser().blockParser;
507+
private List<SourceSpan> replaceParagraphLines(int lines, ParagraphParser paragraphParser) {
508+
// Remove lines from paragraph as the new block is using them.
509+
// If all lines are used, this also unlinks the Paragraph block.
510+
var sourceSpans = paragraphParser.removeLines(lines);
511+
// Close the paragraph block parser, which will finalize it.
512+
closeBlockParsers(1);
513+
return sourceSpans;
514+
}
504515

505-
if (old instanceof ParagraphParser) {
506-
ParagraphParser paragraphParser = (ParagraphParser) old;
507-
// Collect any link reference definitions. Note that replacing the active block parser is done after a
508-
// block parser got the current paragraph content using MatchedBlockParser#getContentString. In case the
509-
// paragraph started with link reference definitions, we parse and strip them before the block parser gets
510-
// the content. We want to keep them.
511-
// If no replacement happens, we collect the definitions as part of finalizing blocks.
512-
addDefinitionsFrom(paragraphParser);
513-
}
516+
private List<SourceSpan> prepareActiveBlockParserForReplacement(BlockParser blockParser) {
517+
// Note that we don't want to parse inlines here, as it's getting replaced.
518+
deactivateBlockParser();
514519

515520
// Do this so that source positions are calculated, which we will carry over to the replacing block.
516-
old.closeBlock();
517-
old.getBlock().unlink();
518-
return old.getBlock();
521+
blockParser.closeBlock();
522+
blockParser.getBlock().unlink();
523+
return blockParser.getBlock().getSourceSpans();
519524
}
520525

521526
private Document finalizeAndProcess() {

commonmark/src/main/java/org/commonmark/internal/HeadingParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ public BlockStart tryStart(ParserState state, MatchedBlockParser matchedBlockPar
6060
if (!paragraph.isEmpty()) {
6161
return BlockStart.of(new HeadingParser(setextHeadingLevel, paragraph))
6262
.atIndex(line.getContent().length())
63-
.replaceActiveBlockParser();
63+
.replaceParagraphLines(paragraph.getLines().size());
6464
}
6565
}
6666

commonmark/src/main/java/org/commonmark/internal/LinkReferenceDefinitionParser.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.commonmark.parser.beta.Scanner;
1111

1212
import java.util.ArrayList;
13+
import java.util.Collections;
1314
import java.util.List;
1415

1516
/**
@@ -102,6 +103,14 @@ State getState() {
102103
return state;
103104
}
104105

106+
List<SourceSpan> removeLines(int lines) {
107+
var removedSpans = Collections.unmodifiableList(new ArrayList<>(
108+
sourceSpans.subList(Math.max(sourceSpans.size() - lines, 0), sourceSpans.size())));
109+
removeLast(lines, paragraphLines);
110+
removeLast(lines, sourceSpans);
111+
return removedSpans;
112+
}
113+
105114
private boolean startDefinition(Scanner scanner) {
106115
// Finish any outstanding references now. We don't do this earlier because we need addSourceSpan to have been
107116
// called before we do it.
@@ -269,6 +278,16 @@ private void finishReference() {
269278
title = null;
270279
}
271280

281+
private static <T> void removeLast(int n, List<T> list) {
282+
if (n >= list.size()) {
283+
list.clear();
284+
} else {
285+
for (int i = 0; i < n; i++) {
286+
list.remove(list.size() - 1);
287+
}
288+
}
289+
}
290+
272291
enum State {
273292
// Looking for the start of a definition, i.e. `[`
274293
START_DEFINITION,

commonmark/src/main/java/org/commonmark/internal/ParagraphParser.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,4 +79,8 @@ public void parseInlines(InlineParser inlineParser) {
7979
public SourceLines getParagraphLines() {
8080
return linkReferenceDefinitionParser.getParagraphLines();
8181
}
82+
83+
public List<SourceSpan> removeLines(int lines) {
84+
return linkReferenceDefinitionParser.removeLines(lines);
85+
}
8286
}

commonmark/src/main/java/org/commonmark/parser/block/BlockStart.java

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,59 @@ public abstract class BlockStart {
1010
protected BlockStart() {
1111
}
1212

13+
/**
14+
* Result for when there is no block start.
15+
*/
1316
public static BlockStart none() {
1417
return null;
1518
}
1619

20+
/**
21+
* Start block(s) with the specified parser(s).
22+
*/
1723
public static BlockStart of(BlockParser... blockParsers) {
1824
return new BlockStartImpl(blockParsers);
1925
}
2026

27+
/**
28+
* Continue parsing at the specified index.
29+
*
30+
* @param newIndex the new index, see {@link ParserState#getIndex()}
31+
*/
2132
public abstract BlockStart atIndex(int newIndex);
2233

34+
/**
35+
* Continue parsing at the specified column (for tab handling).
36+
*
37+
* @param newColumn the new column, see {@link ParserState#getColumn()}
38+
*/
2339
public abstract BlockStart atColumn(int newColumn);
2440

41+
/**
42+
* @deprecated use {@link #replaceParagraphLines(int)} instead; please raise an issue if that doesn't work for you
43+
* for some reason.
44+
*/
45+
@Deprecated
2546
public abstract BlockStart replaceActiveBlockParser();
2647

48+
/**
49+
* Replace a number of lines from the current paragraph (as returned by
50+
* {@link MatchedBlockParser#getParagraphLines()}) with the new block.
51+
* <p>
52+
* This is useful for parsing blocks that start with normal paragraphs and only have special marker syntax in later
53+
* lines, e.g. in this:
54+
* <pre>
55+
* Foo
56+
* ===
57+
* </pre>
58+
* The <code>Foo</code> line is initially parsed as a normal paragraph, then <code>===</code> is parsed as a heading
59+
* marker, replacing the 1 paragraph line before. The end result is a single Heading block.
60+
* <p>
61+
* Note that source spans from the replaced lines are automatically added to the new block.
62+
*
63+
* @param lines the number of lines to replace (at least 1); use {@link Integer#MAX_VALUE} to replace the whole
64+
* paragraph
65+
*/
66+
public abstract BlockStart replaceParagraphLines(int lines);
67+
2768
}

commonmark/src/main/java/org/commonmark/parser/block/MatchedBlockParser.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ public interface MatchedBlockParser {
1212
BlockParser getMatchedBlockParser();
1313

1414
/**
15-
* Returns the current paragraph lines if the matched block is a paragraph.
15+
* Returns the current paragraph lines if the matched block is a paragraph. If you want to use some or all of the
16+
* lines for starting a new block instead, use {@link BlockStart#replaceParagraphLines(int)}.
1617
*
1718
* @return paragraph content or an empty list
1819
*/

0 commit comments

Comments
 (0)