-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtree-sitter-server.js
More file actions
122 lines (111 loc) · 4.91 KB
/
tree-sitter-server.js
File metadata and controls
122 lines (111 loc) · 4.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
const Parser = require('tree-sitter');
const Python = require('tree-sitter-python');
const Rust = require('tree-sitter-rust');
const Go = require('tree-sitter-go');
const Kotlin = require('tree-sitter-kotlin');
const JavaScript = require('tree-sitter-javascript');
const CSharp = require('tree-sitter-c-sharp');
const TypeScript = require('tree-sitter-typescript').typescript;
const CPlusPlus = require('tree-sitter-cpp');
const Java = require('tree-sitter-java');
const C = require('tree-sitter-c');
const Swift = require('tree-sitter-swift');
const Scala = require('tree-sitter-scala');
const Ruby = require('tree-sitter-ruby');
const Bash = require('tree-sitter-bash');
const express = require("express");
const app = express();
app.use(express.json());
app.get('/', (request, response) => {
response.send("tree-sitter server is running");
});
app.get('/health', (request, response) => {
response.send("OK");
});
app.post('/tokenize', (request, response) => {
const parser = new Parser();
const languages = {
"python": Python,
"rust": Rust,
"go": Go,
"kotlin": Kotlin,
"javascript": JavaScript,
"csharp": CSharp,
"typescript": TypeScript,
"cpp": CPlusPlus,
"java": Java,
"c": C,
"swift": Swift,
"scala": Scala,
"ruby": Ruby,
"bash": Bash,
};
const language = languages[request.body.language];
if (language === undefined) {
throw new Error("Language not supported: " + request.body.language + ". Supported languages: " + request.body.language);
}
parser.setLanguage(language);
const tree = parser.parse(request.body.code);
const tokens = [];
let addNode = (node, type) => {
tokens.push({
startRow: node.startPosition.row + 1,
startColumn: node.startPosition.column,
endRow: node.endPosition.row + 1,
endColumn: node.endPosition.column,
type: type ?? String(node.type),
text: String(node.text),
})
}
const traverseTree = (node) => {
const children = node.children;
if (String(node.type) === "preproc_arg") {
// tree-sitter-cpp/c doesn't evaluate defines, so manually parse the content
const prefix = "\n".repeat(node.startPosition.row) + " ".repeat(node.startPosition.column);
const defineTree = parser.parse(prefix + node.text);
traverseTree(defineTree.rootNode);
} else if (String(node.type) === "string_literal") {
// tree-sitter-rust has as string_literal text ""hello"", but has only 2 children (the 2 brackets).
// So manually add the literal text
addNode(node)
} else if (String(node.type) === "string_content") {
// tree-sitter-python puts "\\" as escape_sequence child (but not the individual children).
// So make sure that the whole string gets handled, not just the escape_sequence.
addNode(node)
} else if (String(node.type) === "string") {
// tree-sitter-scala puts "\\" as escape_sequence child (but not the individual children).
// So make sure that the whole string gets handled, not just the escape_sequence.
addNode(node)
} else if (String(node.type) === "line_string_literal") {
// tree-sitter-swift puts "\\" as escape_sequence child (but not the individual children).
// So make sure that the whole string gets handled, not just the escape_sequence.
addNode(node)
} else if (String(node.type) === "interpreted_string_literal") {
// tree-sitter-go has as interpreted_string_literal text ""hello"", but has only 2 children (the 2 brackets).
// So manually add the literal text
addNode(node)
} else if (String(node.type) === "character_literal") {
// tree-sitter-kotlin has as character_literal text "'a'", but has only 2 children (the 2 brackets).
// So manually add the literal text
addNode(node)
} else if (String(node.type) === "predefined_type") {
// tree-sitter-typescript returns for e.g. `let line: string` the `string` as predefined type with 1 child (type=string, text=string).
// So manually add the predefined type, as `string` is a token, not a literal string.
addNode(node)
} else if (String(node.type).indexOf("comment") !== -1) {
// tree-sitter-rust & tree-sitter-scala contain on comments as children just "//".
// So manually add the outer node.
addNode(node)
} else if (children.length === 0) {
addNode(node)
} else {
for (const child of node.children) {
traverseTree(child);
}
}
};
traverseTree(tree.rootNode);
response.send({tokens: tokens});
});
app.listen(8031);
console.log("Listening on port 8031");