Skip to content

Commit ca92c3a

Browse files
author
Calvinn Ng
committed
add code node extraction from tree-sitter
1 parent 515db2d commit ca92c3a

File tree

2 files changed

+90
-18
lines changed

2 files changed

+90
-18
lines changed

core/indexing/chunk/code.ts

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
import { SyntaxNode } from "web-tree-sitter";
2-
import { ChunkWithoutID } from "../..";
3-
import { countTokens } from "../../llm/countTokens";
4-
import { getParserForFile } from "../../util/treeSitter";
2+
import { ChunkWithoutID } from "../../index.js";
3+
import { countTokens } from "../../llm/countTokens.js";
4+
import { getParserForFile } from "../../util/treeSitter.js";
55

66
function collapsedReplacement(node: SyntaxNode): string {
77
if (node.type === "statement_block") {
88
return "{ ... }";
9-
} else {
10-
return "...";
119
}
10+
return "...";
1211
}
1312

1413
function firstChild(
@@ -19,9 +18,8 @@ function firstChild(
1918
return (
2019
node.children.find((child) => grammarName.includes(child.type)) || null
2120
);
22-
} else {
23-
return node.children.find((child) => child.type === grammarName) || null;
2421
}
22+
return node.children.find((child) => child.type === grammarName) || null;
2523
}
2624

2725
function collapseChildren(
@@ -96,6 +94,14 @@ function collapseChildren(
9694
return code;
9795
}
9896

97+
export const FUNCTION_BLOCK_NODE_TYPES = ["block", "statement_block"];
98+
export const FUNCTION_DECLARATION_NODE_TYPEs = [
99+
"method_definition",
100+
"function_definition",
101+
"function_item",
102+
"function_declaration",
103+
];
104+
99105
function constructClassDefinitionChunk(
100106
node: SyntaxNode,
101107
code: string,
@@ -105,8 +111,8 @@ function constructClassDefinitionChunk(
105111
node,
106112
code,
107113
["block", "class_body", "declaration_list"],
108-
["method_definition", "function_definition", "function_item"],
109-
["block", "statement_block"],
114+
FUNCTION_DECLARATION_NODE_TYPEs,
115+
FUNCTION_BLOCK_NODE_TYPES,
110116
maxChunkSize,
111117
);
112118
}
@@ -130,12 +136,10 @@ function constructFunctionDefinitionChunk(
130136
// If inside a class, include the class header
131137
const classNode = node.parent.parent;
132138
const classBlock = node.parent;
133-
return (
134-
code.slice(classNode.startIndex, classBlock.startIndex) +
135-
"...\n\n" +
136-
" ".repeat(node.startPosition.column) + // ...
137-
funcText
138-
);
139+
return `${code.slice(
140+
classNode.startIndex,
141+
classBlock.startIndex,
142+
)}...\n\n${" ".repeat(node.startPosition.column)}${funcText}`;
139143
}
140144
return funcText;
141145
}
@@ -200,7 +204,7 @@ export async function* codeChunker(
200204
return;
201205
}
202206

203-
let parser = await getParserForFile(filepath);
207+
const parser = await getParserForFile(filepath);
204208
if (parser === undefined) {
205209
console.warn(`Failed to load parser for file ${filepath}: `);
206210
return;
@@ -209,4 +213,4 @@ export async function* codeChunker(
209213
const tree = parser.parse(contents);
210214

211215
yield* getSmartCollapsedChunks(tree.rootNode, contents, maxChunkSize);
212-
}
216+
}

core/util/ranges.ts

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { Range } from "..";
1+
import { Position, Range } from "../index.js";
22

33
export function getRangeInString(content: string, range: Range): string {
44
const lines = content.split("\n");
@@ -23,3 +23,71 @@ export function getRangeInString(content: string, range: Range): string {
2323

2424
return [firstLine, ...middleLines, lastLine].join("\n");
2525
}
26+
27+
export function intersection(a: Range, b: Range): Range | null {
28+
const startLine = Math.max(a.start.line, b.start.line);
29+
const endLine = Math.min(a.end.line, b.end.line);
30+
31+
if (startLine > endLine) {
32+
return null;
33+
}
34+
35+
if (startLine === endLine) {
36+
const startCharacter = Math.max(a.start.character, b.start.character);
37+
const endCharacter = Math.min(a.end.character, b.end.character);
38+
39+
if (startCharacter > endCharacter) {
40+
return null;
41+
}
42+
43+
return {
44+
start: { line: startLine, character: startCharacter },
45+
end: { line: endLine, character: endCharacter },
46+
};
47+
}
48+
49+
const startCharacter =
50+
startLine === a.start.line ? a.start.character : b.start.character;
51+
const endCharacter =
52+
endLine === a.end.line ? a.end.character : b.end.character;
53+
54+
return {
55+
start: { line: startLine, character: startCharacter },
56+
end: { line: endLine, character: endCharacter },
57+
};
58+
}
59+
60+
export function union(a: Range, b: Range): Range {
61+
const startLine = Math.min(a.start.line, b.start.line);
62+
const endLine = Math.max(a.end.line, b.end.line);
63+
64+
const startCharacter =
65+
startLine === a.start.line ? a.start.character : b.start.character;
66+
const endCharacter =
67+
endLine === a.end.line ? a.end.character : b.end.character;
68+
69+
return {
70+
start: { line: startLine, character: startCharacter },
71+
end: { line: endLine, character: endCharacter },
72+
};
73+
}
74+
75+
export function maxPosition(a: Position, b: Position): Position {
76+
if (a.line > b.line) {
77+
return a;
78+
} else if (a.line < b.line) {
79+
return b;
80+
} else {
81+
return a.character > b.character ? a : b;
82+
}
83+
}
84+
85+
export function minPosition(a: Position, b: Position): Position {
86+
if (a.line < b.line) {
87+
return a;
88+
} else if (a.line > b.line) {
89+
return b;
90+
} else {
91+
return a.character < b.character ? a : b;
92+
}
93+
}

0 commit comments

Comments
 (0)