Skip to content

Commit f6ed8a3

Browse files
Add funcion specific README.md
1 parent 6fd4da1 commit f6ed8a3

File tree

4 files changed

+154
-53
lines changed

4 files changed

+154
-53
lines changed

functions/tree-sitter/README.md

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Usage
2+
3+
When an agent needs to extract code ranges using tree-sitter queries (tree-sitter queries are already very common and current foundational LLM can already generate these queries).
4+
Internally, this tool uses nixpkgs to pull pre-compiled, and verified parsers. `markdown`, `python`, `java`, `html`, `dockerfile`, and `bash` are all tested but obviously hundreds of different
5+
languages are supported.
6+
7+
For example, if we prompt an LLM to "extract top-level function definitions from a Python module", it will generate the following query.
8+
9+
```lisp
10+
(module (function_definition) @top-level)
11+
```
12+
13+
The agent interface is shown here. This is the interface that the agent will use to interace with the tool.
14+
As always, the tool itself is a docker container.
15+
16+
```yaml
17+
name: tree-sitter
18+
description: Extract code ranges using tree-sitter queries
19+
parameters:
20+
type: object
21+
properties:
22+
lang:
23+
type: string
24+
description: language to parse
25+
query:
26+
type: string
27+
description: tree-sitter query
28+
file:
29+
type: string
30+
description: the file to parse
31+
container:
32+
image: vonwig/tree-sitter:latest
33+
command:
34+
- "-lang"
35+
- "{{lang}}"
36+
- "-query"
37+
- "{{query}}"
38+
stdin:
39+
file: "{{file}}"
40+
```
41+
42+
The tool streams back a series of json code ranges.
43+
44+
```json
45+
{
46+
"capture_name": "top-level",
47+
"node_text": "def hello():\\n\\tprint(\"hello\")",
48+
"start_byte": 0,
49+
"end_byte": 30,
50+
"start_point": {
51+
"row": 0,
52+
"column": 0
53+
},
54+
"end_point": {
55+
"row": 0,
56+
"column": 30
57+
}
58+
}
59+
```
60+
61+
## Aside on tool creation
62+
63+
This tool itself was generated by an LLM.
64+
65+
## Using the container
66+
67+
The tool can also be called directly using `docker run`.
68+
69+
```sh
70+
docker run --rm -i vonwig/tree-sitter -lang python -query "(module (function_definition) @top-level)" < <(echo "def hello():\n\tprint(\"hello\")")
71+
```
72+

functions/tree-sitter/cmd/ts/main.go

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package main
22

33
import (
4+
"flag"
45
"fmt"
56
"io/ioutil"
67
"os"
8+
"encoding/json"
79

810
sitter "github.com/smacker/go-tree-sitter"
911
"github.com/smacker/go-tree-sitter/python"
@@ -15,13 +17,22 @@ import (
1517
)
1618

1719
func main() {
18-
// Check if both language and query string are provided as arguments
19-
if len(os.Args) < 3 {
20-
fmt.Println("Usage: ./program <language> <query_string>")
20+
// Define flags
21+
languagePtr := flag.String("lang", "", "The programming language to parse (required)")
22+
queryPtr := flag.String("query", "", "The query string to execute (required)")
23+
24+
// Parse flags
25+
flag.Parse()
26+
27+
// Check if required flags are provided
28+
if *languagePtr == "" {
29+
fmt.Println("Usage: ./program -lang <language> -query <query_string>")
30+
flag.PrintDefaults()
2131
return
2232
}
23-
language := os.Args[1]
24-
queryString := os.Args[2]
33+
34+
// language is mandatory
35+
language := *languagePtr
2536

2637
// Create a parser
2738
parser := sitter.NewParser()
@@ -58,8 +69,12 @@ func main() {
5869
tree := parser.Parse(nil, sourceCode)
5970
defer tree.Close()
6071

61-
// Write the S-expression of the tree to stdout
62-
fmt.Println(tree.RootNode().String())
72+
queryString := *queryPtr
73+
if queryString == "" {
74+
// Write the S-expression of the tree to stdout
75+
fmt.Println(tree.RootNode().String())
76+
return
77+
}
6378

6479
// Create a query
6580
query, err := sitter.NewQuery([]byte(queryString), lang)
@@ -85,7 +100,48 @@ func main() {
85100
for _, capture := range match.Captures {
86101
captureName := query.CaptureNameForId(capture.Index)
87102
nodeText := capture.Node.Content(sourceCode)
88-
fmt.Printf("Capture: %s, Node: %s\n", captureName, nodeText)
103+
104+
captureInfo := struct {
105+
CaptureName string `json:"capture_name"`
106+
NodeText string `json:"node_text"`
107+
StartByte uint32 `json:"start_byte"`
108+
EndByte uint32 `json:"end_byte"`
109+
StartPoint struct {
110+
Row uint32 `json:"row"`
111+
Column uint32 `json:"column"`
112+
} `json:"start_point"`
113+
EndPoint struct {
114+
Row uint32 `json:"row"`
115+
Column uint32 `json:"column"`
116+
} `json:"end_point"`
117+
}{
118+
CaptureName: captureName,
119+
NodeText: nodeText,
120+
StartByte: capture.Node.StartByte(),
121+
EndByte: capture.Node.EndByte(),
122+
StartPoint: struct {
123+
Row uint32 `json:"row"`
124+
Column uint32 `json:"column"`
125+
}{
126+
Row: capture.Node.StartPoint().Row,
127+
Column: capture.Node.StartPoint().Column,
128+
},
129+
EndPoint: struct {
130+
Row uint32 `json:"row"`
131+
Column uint32 `json:"column"`
132+
}{
133+
Row: capture.Node.EndPoint().Row,
134+
Column: capture.Node.EndPoint().Column,
135+
},
136+
}
137+
138+
jsonData, err := json.MarshalIndent(captureInfo, "", " ")
139+
if err != nil {
140+
fmt.Println("Error marshaling JSON:", err)
141+
continue
142+
}
143+
144+
fmt.Println(string(jsonData))
89145
}
90146
}
91147
}

functions/tree-sitter/flake.nix

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -25,47 +25,6 @@
2525

2626
in rec {
2727
packages = rec {
28-
# darwin versus linux
29-
dylibExt = if nixpkgs.lib.hasInfix "darwin" system then "dylib" else "so";
30-
31-
lib = pkgs.stdenv.mkDerivation {
32-
name = "lib";
33-
src = ./.;
34-
installPhase = ''
35-
mkdir -p $out/lib;
36-
cp ${pkgs.tree-sitter}/lib/libtree-sitter.${dylibExt} $out/lib/;
37-
cp ${pkgs.tree-sitter-grammars.tree-sitter-markdown}/parser $out/lib/libtree-sitter-markdown.${dylibExt};
38-
cp ${pkgs.tree-sitter-grammars.tree-sitter-python}/parser $out/lib/libtree-sitter-python.${dylibExt};
39-
'';
40-
};
41-
42-
# derive the parser
43-
parser = pkgs.stdenv.mkDerivation {
44-
name = "parser";
45-
src = ./.;
46-
nativeBuildInputs = [
47-
pkgs.gcc
48-
pkgs.findutils
49-
pkgs.patchelf
50-
];
51-
buildPhase = ''
52-
${pkgs.gcc}/bin/gcc -o parser \
53-
main.c \
54-
-I${pkgs.tree-sitter}/include \
55-
${pkgs.tree-sitter-grammars.tree-sitter-markdown}/parser \
56-
${pkgs.tree-sitter-grammars.tree-sitter-python}/parser \
57-
${pkgs.tree-sitter}/lib/libtree-sitter.${dylibExt}
58-
'';
59-
60-
installPhase = ''
61-
mkdir -p $out/bin;
62-
cp parser $out/bin/parser;
63-
'';
64-
65-
fixupPhase = ''
66-
find $out -type f -exec patchelf --shrink-rpath '{}' \; -exec strip '{}' \; 2>/dev/null
67-
'';
68-
};
6928

7029
goBinary = pkgs.buildGoModule {
7130
pname = "tree-sitter-query";
@@ -90,7 +49,6 @@
9049
subPackages = [ "cmd/ts" ];
9150
};
9251

93-
# the script must have gh in the PATH
9452
default = pkgs.writeShellScriptBin "entrypoint" ''
9553
export PATH=${pkgs.lib.makeBinPath [goBinary]}
9654
ts "$@"

functions/tree-sitter/runbook.md

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,26 @@ docker build -t vonwig/tree-sitter .
1010
```
1111

1212
```sh
13-
./result/bin/ts python "(module (function_definition) @top-level)" < test/resources/hello.py
14-
./result/bin/ts markdown "(document (section (atx_heading (atx_h1_marker))) @h1)" < test/resources/hello.md
13+
# docker:command=release-build
14+
15+
docker buildx build \
16+
--builder hydrobuild \
17+
--platform linux/amd64,linux/arm64 \
18+
--tag vonwig/tree-sitter:latest \
19+
--file Dockerfile \
20+
--push .
21+
```
22+
23+
```sh
24+
./result/bin/ts -lang python -query "(module (function_definition) @top-level)" < test/resources/hello.py
25+
./result/bin/ts -lang markdown -query "(document (section (atx_heading (atx_h1_marker))) @h1)" < test/resources/hello.md
26+
```
27+
28+
```sh
29+
./result/bin/ts -lang markdown < test/resources/hello.md
1530
```
1631

1732
```sh
18-
docker run --rm -i vonwig/tree-sitter python "(module (function_definition) @top-level)" < <(echo "def hello():\n\tprint(\"hello\")")
33+
docker run --rm -i vonwig/tree-sitter -lang python -query "(module (function_definition) @top-level)" < <(echo "def hello():\n\tprint(\"hello\")")
1934
```
2035

0 commit comments

Comments
 (0)