Skip to content

Commit bfabf0b

Browse files
committed
feat: add markdown support
1 parent c5d1f3f commit bfabf0b

File tree

15 files changed

+137723
-0
lines changed

15 files changed

+137723
-0
lines changed

_automation/grammars.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,5 +322,16 @@
322322
"reference": "v0.5.0",
323323
"revision": "6129a83eeec7d6070b1c0567ec7ce3509ead607c",
324324
"updateBasedOn": "tag"
325+
},
326+
{
327+
"language": "markdown",
328+
"url": "https://github.com/tree-sitter-grammars/tree-sitter-markdown",
329+
"files": [
330+
"parser.c",
331+
"scanner.c"
332+
],
333+
"reference": "v0.2.3",
334+
"revision": "62516e8c78380e3b51d5b55727995d2c511436d8",
335+
"updateBasedOn": "tag"
325336
}
326337
]

_automation/main.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,8 @@ func (s *UpdateService) downloadGrammar(ctx context.Context, g *Grammar) {
239239
s.downloadYaml(ctx, g)
240240
case "php":
241241
s.downloadPhp(ctx, g)
242+
case "markdown":
243+
s.downloadMarkdown(ctx, g)
242244
default:
243245
s.defaultGrammarDownload(ctx, g)
244246
}
@@ -434,6 +436,34 @@ func (s *UpdateService) downloadTypescript(ctx context.Context, g *Grammar) {
434436
}
435437
}
436438

439+
// markdown is special as it contains 2 different grammars
440+
func (s *UpdateService) downloadMarkdown(ctx context.Context, g *Grammar) {
441+
url := g.ContentURL()
442+
443+
langs := []string{"tree-sitter-markdown", "tree-sitter-markdown-inline"}
444+
for _, lang := range langs {
445+
s.makeDir(ctx, fmt.Sprintf("%s/%s", g.Language, lang))
446+
447+
s.downloadFile(
448+
ctx,
449+
fmt.Sprintf("%s/%s/%s/src/tree_sitter/parser.h", url, g.Revision, lang),
450+
fmt.Sprintf("%s/%s/parser.h", g.Language, lang),
451+
nil,
452+
)
453+
454+
for _, f := range g.Files {
455+
s.downloadFile(
456+
ctx,
457+
fmt.Sprintf("%s/%s/%s/src/%s", url, g.Revision, lang, f),
458+
fmt.Sprintf("%s/%s/%s", g.Language, lang, f),
459+
map[string]string{
460+
`"tree_sitter/parser.h"`: `"parser.h"`,
461+
},
462+
)
463+
}
464+
}
465+
}
466+
437467
// for yaml grammar scanner.cc includes schema.generated.cc file
438468
// it causes cgo to compile schema.generated.cc twice and throw duplicate symbols error
439469
func (s *UpdateService) downloadYaml(ctx context.Context, g *Grammar) {

bindings.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,10 @@ func (t SymbolType) String() string {
405405
return symbolTypeNames[t]
406406
}
407407

408+
func (n Node) ID() uintptr {
409+
return uintptr(n.c.id)
410+
}
411+
408412
// StartByte returns the node's start byte.
409413
func (n Node) StartByte() uint32 {
410414
return uint32(C.ts_node_start_byte(n.c))
@@ -433,6 +437,15 @@ func (n Node) EndPoint() Point {
433437
}
434438
}
435439

440+
func (n Node) Range() Range {
441+
return Range{
442+
StartByte: n.StartByte(),
443+
EndByte: n.EndByte(),
444+
StartPoint: n.StartPoint(),
445+
EndPoint: n.EndPoint(),
446+
}
447+
}
448+
436449
// Symbol returns the node's type as a Symbol.
437450
func (n Node) Symbol() Symbol {
438451
return C.ts_node_symbol(n.c)

markdown/binding.go

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
package markdown
2+
3+
import (
4+
"context"
5+
6+
sitter "github.com/smacker/go-tree-sitter"
7+
tree_sitter_markdown "github.com/smacker/go-tree-sitter/markdown/tree-sitter-markdown"
8+
tree_sitter_markdown_inline "github.com/smacker/go-tree-sitter/markdown/tree-sitter-markdown-inline"
9+
)
10+
11+
type MarkdownTree struct {
12+
blockTree *sitter.Tree
13+
inlineTrees []*sitter.Tree
14+
inlineIndices map[uintptr]int
15+
}
16+
17+
func (t *MarkdownTree) Edit(edit sitter.EditInput) {
18+
t.blockTree.Edit(edit)
19+
for _, tree := range t.inlineTrees {
20+
tree.Edit(edit)
21+
}
22+
}
23+
24+
func (t *MarkdownTree) BlockTree() *sitter.Tree {
25+
return t.blockTree
26+
}
27+
28+
func (t *MarkdownTree) InlineTree(parent *sitter.Node) *sitter.Tree {
29+
if parent == nil {
30+
return nil
31+
}
32+
33+
index, ok := t.inlineIndices[parent.ID()]
34+
if ok {
35+
return t.inlineTrees[index]
36+
}
37+
38+
return nil
39+
}
40+
41+
func (t *MarkdownTree) InlineRootNode(parent *sitter.Node) *sitter.Node {
42+
tree := t.InlineTree(parent)
43+
if tree == nil {
44+
return nil
45+
}
46+
47+
return tree.RootNode()
48+
}
49+
50+
func (t *MarkdownTree) InlineTrees() []*sitter.Tree {
51+
return t.inlineTrees
52+
}
53+
54+
func (t *MarkdownTree) Iter(f func(node *Node) bool) {
55+
root := t.blockTree.RootNode()
56+
t.iter(&Node{root, t.InlineRootNode(root)}, f)
57+
}
58+
59+
func (t *MarkdownTree) iter(node *Node, f func(node *Node) bool) (goNext bool) {
60+
goNext = f(node)
61+
if !goNext {
62+
return goNext
63+
}
64+
65+
childCount := node.NamedChildCount()
66+
for i := 0; i < int(childCount); i++ {
67+
child := node.NamedChild(i)
68+
69+
goNext = t.iter(&Node{Node: child, Inline: t.InlineRootNode(child)}, f)
70+
if !goNext {
71+
return goNext
72+
}
73+
}
74+
75+
return true
76+
}
77+
78+
type Node struct {
79+
*sitter.Node
80+
Inline *sitter.Node
81+
}
82+
83+
func ParseCtx(ctx context.Context, oldTree *MarkdownTree, content []byte) (*MarkdownTree, error) {
84+
p := sitter.NewParser()
85+
p.SetLanguage(tree_sitter_markdown.GetLanguage())
86+
87+
var old *sitter.Tree
88+
if oldTree != nil {
89+
old = oldTree.blockTree
90+
}
91+
tree, err := p.ParseCtx(ctx, old, content)
92+
if err != nil {
93+
return nil, err
94+
}
95+
96+
res := &MarkdownTree{
97+
blockTree: tree,
98+
inlineTrees: []*sitter.Tree{},
99+
inlineIndices: map[uintptr]int{},
100+
}
101+
102+
p.SetLanguage(tree_sitter_markdown_inline.GetLanguage())
103+
104+
q, err := sitter.NewQuery([]byte(`(inline) @inline`), tree_sitter_markdown.GetLanguage())
105+
if err != nil {
106+
return nil, err
107+
}
108+
109+
qc := sitter.NewQueryCursor()
110+
qc.Exec(q, tree.RootNode())
111+
112+
idx := int(0)
113+
for {
114+
match, ok := qc.NextMatch()
115+
if !ok {
116+
break
117+
}
118+
119+
for _, capture := range match.Captures {
120+
r := capture.Node.Range()
121+
ranges := []sitter.Range{}
122+
for i := 0; i < int(capture.Node.NamedChildCount()); i++ {
123+
child := capture.Node.NamedChild(i)
124+
childRange := child.Range()
125+
ranges = append(ranges, sitter.Range{
126+
StartPoint: r.StartPoint,
127+
StartByte: r.StartByte,
128+
EndPoint: childRange.EndPoint,
129+
EndByte: childRange.EndByte,
130+
})
131+
132+
r.StartPoint = childRange.EndPoint
133+
r.StartByte = childRange.EndByte
134+
}
135+
136+
ranges = append(ranges, r)
137+
p.SetIncludedRanges(ranges)
138+
var old *sitter.Tree
139+
if oldTree != nil && idx < len(oldTree.inlineTrees) {
140+
old = oldTree.inlineTrees[idx]
141+
}
142+
143+
inlineTree, err := p.ParseCtx(ctx, old, content)
144+
if err != nil {
145+
return nil, err
146+
}
147+
148+
res.inlineTrees = append(res.inlineTrees, inlineTree)
149+
res.inlineIndices[capture.Node.ID()] = idx
150+
idx++
151+
}
152+
}
153+
qc.Close()
154+
155+
return res, nil
156+
}

markdown/binding_test.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package markdown_test
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/smacker/go-tree-sitter/markdown"
8+
"github.com/stretchr/testify/assert"
9+
)
10+
11+
func TestMarkdown(t *testing.T) {
12+
assert := assert.New(t)
13+
14+
content := "# Hello\n- This is a image: ![image](https://example.com/image.jpg \"a image\")"
15+
tree, err := markdown.ParseCtx(context.Background(), nil, []byte(content))
16+
assert.NoError(err)
17+
18+
assert.Equal(
19+
"(document (section (atx_heading (atx_h1_marker) heading_content: (inline)) (list (list_item (list_marker_minus) (paragraph (inline))))))",
20+
tree.BlockTree().RootNode().String(),
21+
)
22+
23+
assert.Equal(
24+
"(inline)",
25+
tree.InlineTrees()[0].RootNode().String(),
26+
)
27+
28+
assert.Equal(
29+
"(inline (image (image_description) (link_destination) (link_title)))",
30+
tree.InlineTrees()[1].RootNode().String(),
31+
)
32+
}
33+
34+
func TestIter(t *testing.T) {
35+
assert := assert.New(t)
36+
37+
content := "# Hello\n- This two image: ![image](https://example.com/image.jpg \"a image\"), ![apple](https://example.com/apple.jpg \"a apple\")"
38+
tree, err := markdown.ParseCtx(context.Background(), nil, []byte(content))
39+
assert.NoError(err)
40+
41+
type BlockWithInline struct {
42+
Node string
43+
InlineNode string
44+
}
45+
46+
expected := []BlockWithInline{
47+
{"(document (section (atx_heading (atx_h1_marker) heading_content: (inline)) (list (list_item (list_marker_minus) (paragraph (inline))))))", ""},
48+
{"(section (atx_heading (atx_h1_marker) heading_content: (inline)) (list (list_item (list_marker_minus) (paragraph (inline)))))", ""},
49+
{"(atx_heading (atx_h1_marker) heading_content: (inline))", ""},
50+
{"(atx_h1_marker)", ""},
51+
{"(inline)", "(inline)"},
52+
{"(list (list_item (list_marker_minus) (paragraph (inline))))", ""},
53+
{"(list_item (list_marker_minus) (paragraph (inline)))", ""},
54+
{"(list_marker_minus)", ""},
55+
{"(paragraph (inline))", ""},
56+
{"(inline)", "(inline (image (image_description) (link_destination) (link_title)) (image (image_description) (link_destination) (link_title)))"},
57+
}
58+
59+
i := int(0)
60+
tree.Iter(func(node *markdown.Node) bool {
61+
assert.Equal(expected[i].Node, node.String(), "node mismatch. idx: %d", i)
62+
if expected[i].InlineNode != "" || node.Inline != nil {
63+
assert.Equal(expected[i].InlineNode, node.Inline.String(), "inline node mismatch. idx: %d", i)
64+
}
65+
66+
i++
67+
return true
68+
})
69+
}
70+
71+
func TestIterStop(t *testing.T) {
72+
assert := assert.New(t)
73+
74+
content := "# Hello\n- This two image: ![image](https://example.com/image.jpg \"a image\"), ![apple](https://example.com/apple.jpg \"a apple\")"
75+
tree, err := markdown.ParseCtx(context.Background(), nil, []byte(content))
76+
assert.NoError(err)
77+
78+
type BlockWithInline struct {
79+
Node string
80+
InlineNode string
81+
}
82+
83+
expected := []string{
84+
"(document (section (atx_heading (atx_h1_marker) heading_content: (inline)) (list (list_item (list_marker_minus) (paragraph (inline))))))",
85+
"(section (atx_heading (atx_h1_marker) heading_content: (inline)) (list (list_item (list_marker_minus) (paragraph (inline)))))",
86+
"(atx_heading (atx_h1_marker) heading_content: (inline))",
87+
}
88+
89+
collected := []string{}
90+
tree.Iter(func(node *markdown.Node) bool {
91+
collected = append(collected, node.String())
92+
93+
if node.Type() == "document" || node.Type() == "section" {
94+
return true
95+
}
96+
97+
return false
98+
})
99+
100+
assert.Equal(expected, collected)
101+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package tree_sitter_markdown_inline
2+
3+
//#include "parser.h"
4+
//TSLanguage *tree_sitter_markdown_inline();
5+
import "C"
6+
import (
7+
"unsafe"
8+
9+
sitter "github.com/smacker/go-tree-sitter"
10+
)
11+
12+
func GetLanguage() *sitter.Language {
13+
ptr := unsafe.Pointer(C.tree_sitter_markdown_inline())
14+
return sitter.NewLanguage(ptr)
15+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
package tree_sitter_markdown_inline_test
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
sitter "github.com/smacker/go-tree-sitter"
8+
tree_sitter_markdown_inline "github.com/smacker/go-tree-sitter/markdown/tree-sitter-markdown-inline"
9+
"github.com/stretchr/testify/assert"
10+
)
11+
12+
func TestGrammar(t *testing.T) {
13+
assert := assert.New(t)
14+
15+
n, err := sitter.ParseCtx(context.Background(), []byte("# Hello world!\n- Here is a picture: ![picture](https://example.com/picture.png)"), tree_sitter_markdown_inline.GetLanguage())
16+
assert.NoError(err)
17+
assert.Equal(
18+
"(inline (image (image_description) (link_destination)))",
19+
n.String(),
20+
)
21+
}

0 commit comments

Comments
 (0)