Skip to content

Commit aafa578

Browse files
Merge pull request #310 from olasunkanmi-SE/feature_enhancement
feat: implement AST indexing service with worker thread support and c…
2 parents 9898ced + 666db40 commit aafa578

File tree

17 files changed

+470
-134
lines changed

17 files changed

+470
-134
lines changed

esbuild.js

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,30 @@ async function main() {
184184
plugins: [nodeModulesPlugin, treeShakingPlugin],
185185
});
186186

187+
// Worker bundle
188+
const workerCtx = await esbuild.context({
189+
entryPoints: ["src/workers/ast-analyzer.worker.ts"],
190+
bundle: true,
191+
external: [
192+
"vscode",
193+
"better-sqlite3",
194+
"electron",
195+
"@lancedb/lancedb",
196+
"apache-arrow",
197+
"web-tree-sitter",
198+
"@vscode/ripgrep",
199+
],
200+
format: "cjs",
201+
target: "node16",
202+
platform: "node",
203+
minify: production,
204+
sourcemap: !production,
205+
outfile: "dist/workers/ast-analyzer.worker.js",
206+
metafile: true,
207+
logLevel: "info",
208+
plugins: [nodeModulesPlugin, treeShakingPlugin],
209+
});
210+
187211
// Webview bundle
188212
const webviewCtx = await esbuild.context({
189213
entryPoints: ["webviewUi/src/main.tsx"],
@@ -238,14 +262,16 @@ async function main() {
238262
} else {
239263
console.log("🚀 Building...");
240264
const startTime = Date.now();
241-
const [mainResult, webviewResult] = await Promise.all([
265+
const [mainResult, workerResult, webviewResult] = await Promise.all([
242266
mainCtx.rebuild(),
267+
workerCtx.rebuild(),
243268
webviewCtx.rebuild(),
244269
]);
245270
const duration = Date.now() - startTime;
246271
console.log(`\n✨ Build completed in ${duration}ms`);
247272
if (production) {
248273
const mainSize = fs.statSync("dist/extension.js").size / 1024;
274+
const workerSize = fs.statSync("dist/workers/ast-analyzer.worker.js").size / 1024;
249275
const webviewSize = fs
250276
.readdirSync("dist/webview")
251277
.filter((f) => f.endsWith(".js"))
@@ -256,9 +282,11 @@ async function main() {
256282
);
257283
console.log("\n📦 Bundle sizes:");
258284
console.log(` Extension: ${mainSize.toFixed(2)}KB`);
285+
console.log(` Worker: ${workerSize.toFixed(2)}KB`);
259286
console.log(` Webview: ${webviewSize.toFixed(2)}KB`);
260287
}
261288
await mainCtx.dispose();
289+
await workerCtx.dispose();
262290
await webviewCtx.dispose();
263291
}
264292
} catch (error) {

package.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,11 @@
484484
"default": true,
485485
"description": "Enable streaming responses from AI models"
486486
},
487+
"codebuddy.compactMode": {
488+
"type": "boolean",
489+
"default": false,
490+
"description": "Reduce spacing between messages for a denser view"
491+
},
487492
"codebuddy.nickname": {
488493
"type": "string",
489494
"default": "",

src/application/constant.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ export const APP_CONFIG = {
7171
chatview: "chatview.theme",
7272
chatviewFont: "chatview.font.size",
7373
tavilyApiKey: "tavily.apiKey",
74+
compactMode: "codebuddy.compactMode",
7475
};
7576

7677
export enum generativeAiModels {

src/commands/index-workspace.ts

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import * as vscode from "vscode";
2-
import { ContextRetriever } from "../services/context-retriever";
2+
import { AstIndexingService } from "../services/ast-indexing.service";
33
import { Logger, LogLevel } from "../infrastructure/logger/logger";
44

55
const logger = Logger.initialize("IndexWorkspace", {
@@ -10,11 +10,11 @@ const logger = Logger.initialize("IndexWorkspace", {
1010
});
1111

1212
export async function indexWorkspaceCommand(): Promise<void> {
13-
const contextRetriever = ContextRetriever.initialize();
13+
const astIndexer = AstIndexingService.getInstance(); // Singleton already initialized
1414

1515
// Find all files, excluding common ignored folders
1616
const files = await vscode.workspace.findFiles(
17-
"**/*",
17+
"**/*.{ts,js,tsx,jsx,py,java,go,rs,cpp,h,c}", // Limit to code files for AST analysis
1818
"**/{node_modules,.git,dist,out,build,coverage,.codebuddy}/**",
1919
);
2020

@@ -26,39 +26,34 @@ export async function indexWorkspaceCommand(): Promise<void> {
2626
vscode.window.withProgress(
2727
{
2828
location: vscode.ProgressLocation.Notification,
29-
title: "Indexing Workspace",
29+
title: "Indexing Workspace (Background Worker)",
3030
cancellable: true,
3131
},
3232
async (progress, token) => {
3333
let processed = 0;
3434
const total = files.length;
3535

3636
for (const file of files) {
37-
if (token.isCancellationRequested) {
38-
break;
39-
}
37+
if (token.isCancellationRequested) break;
4038

4139
try {
4240
progress.report({
43-
message: `Indexing ${processed}/${total}: ${vscode.workspace.asRelativePath(file)}`,
41+
message: `Queueing ${processed}/${total}: ${vscode.workspace.asRelativePath(file)}`,
4442
increment: (1 / total) * 100,
4543
});
44+
4645
const document = await vscode.workspace.openTextDocument(file);
47-
// Skip very large files or binary files if possible (openTextDocument handles binary check implicitly by failing or returning weird stuff, but we can check languageId?)
48-
if (document.lineCount > 5000) {
49-
logger.warn(`Skipping large file: ${file.fsPath}`);
50-
continue;
51-
}
52-
await contextRetriever.indexFile(file.fsPath, document.getText());
46+
// Offload to worker
47+
astIndexer.indexFile(file.fsPath, document.getText());
5348
} catch (error) {
54-
logger.error(`Failed to index file: ${file.fsPath}`, error);
49+
logger.error(`Failed to queue file: ${file.fsPath}`, error);
5550
}
5651

5752
processed++;
5853
}
5954

6055
vscode.window.showInformationMessage(
61-
`Workspace indexing complete. Indexed ${processed} files.`,
56+
`Workspace indexing queued for ${processed} files. Check logs for completion.`,
6257
);
6358
},
6459
);

src/extension.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ import { AgentRunningGuardService } from "./services/agent-running-guard.service
5757

5858
import { DiffReviewService } from "./services/diff-review.service";
5959
import { SecretStorageService } from "./services/secret-storage";
60+
import { AstIndexingService } from "./services/ast-indexing.service";
6061

6162
const logger = Logger.initialize("extension-main", {
6263
minLevel: LogLevel.DEBUG,
@@ -207,6 +208,10 @@ export async function activate(context: vscode.ExtensionContext) {
207208
const terminal = Terminal.getInstance();
208209
terminal.setExtensionPath(context.extensionPath);
209210

211+
// Initialize AST Indexing Service (Worker Thread Manager)
212+
AstIndexingService.getInstance(context);
213+
logger.info("AST Indexing Service initialized");
214+
210215
new DeveloperAgent({});
211216
const selectedGenerativeAiModel = getConfigValue("generativeAi.option");
212217
// setConfigValue("generativeAi.option", "Gemini");
@@ -227,9 +232,9 @@ export async function activate(context: vscode.ExtensionContext) {
227232
// Initialize ContextRetriever for semantic search
228233
const contextRetriever = ContextRetriever.initialize(context);
229234

230-
// Auto-index files on save
235+
// Auto-index files on save using the optimized Worker Service
231236
context.subscriptions.push(
232-
vscode.workspace.onDidSaveTextDocument(async (document) => {
237+
vscode.workspace.onDidSaveTextDocument((document) => {
233238
// Skip irrelevant files
234239
if (
235240
document.languageId === "git-commit" ||
@@ -242,8 +247,8 @@ export async function activate(context: vscode.ExtensionContext) {
242247
}
243248

244249
try {
245-
// Index the updated file content
246-
await contextRetriever.indexFile(
250+
// Offload indexing to the worker thread
251+
AstIndexingService.getInstance().indexFile(
247252
document.fileName,
248253
document.getText(),
249254
);
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import * as vscode from "vscode";
2+
import { Worker } from "worker_threads";
3+
import * as path from "path";
4+
import { Logger } from "../infrastructure/logger/logger";
5+
import { SimpleVectorStore } from "./simple-vector-store";
6+
import { EmbeddingService } from "./embedding";
7+
import { getAPIKeyAndModel } from "../utils/utils";
8+
9+
export class AstIndexingService {
10+
private worker: Worker | undefined;
11+
private readonly logger: Logger;
12+
private vectorStore: SimpleVectorStore;
13+
private embeddingService: EmbeddingService;
14+
private queue: string[] = [];
15+
private isProcessing = false;
16+
17+
private static instance: AstIndexingService;
18+
19+
constructor(context: vscode.ExtensionContext) {
20+
this.logger = Logger.initialize("AstIndexingService", {});
21+
this.vectorStore = new SimpleVectorStore(context);
22+
23+
// Initialize embedding service
24+
const { apiKey } = getAPIKeyAndModel("gemini");
25+
this.embeddingService = new EmbeddingService(apiKey);
26+
27+
this.initializeWorker(context);
28+
}
29+
30+
public static getInstance(
31+
context?: vscode.ExtensionContext,
32+
): AstIndexingService {
33+
if (!AstIndexingService.instance) {
34+
if (!context) {
35+
throw new Error(
36+
"AstIndexingService not initialized. Context required for first initialization.",
37+
);
38+
}
39+
AstIndexingService.instance = new AstIndexingService(context);
40+
}
41+
return AstIndexingService.instance;
42+
}
43+
44+
private initializeWorker(context: vscode.ExtensionContext) {
45+
// Determine the worker execution path (dist for prod, out for dev)
46+
const isProd = __filename.includes("dist");
47+
const workerRelativePath = isProd
48+
? "../workers/ast-analyzer.worker.js"
49+
: "../../out/workers/ast-analyzer.worker.js";
50+
51+
const workerPath = path.resolve(__dirname, workerRelativePath);
52+
53+
try {
54+
this.logger.info(`Initializing Indexing Worker at: ${workerPath}`);
55+
this.worker = new Worker(workerPath);
56+
57+
this.worker.on("message", this.handleWorkerMessage.bind(this));
58+
this.worker.on("error", (err) => this.logger.error("Worker error:", err));
59+
this.worker.on("exit", (code) => {
60+
if (code !== 0) {
61+
this.logger.error(`Worker stopped with exit code ${code}`);
62+
// Restart specific worker logic could go here
63+
}
64+
});
65+
} catch (error) {
66+
this.logger.error("Failed to initialize worker", error);
67+
}
68+
}
69+
70+
private async handleWorkerMessage(message: any) {
71+
if (message.type === "RESULT") {
72+
const { chunks, filePath } = message.data;
73+
this.logger.info(
74+
`Worker finished file: ${filePath}, generated ${chunks.length} chunks`,
75+
);
76+
77+
// Process embeddings in main thread (or separate worker)
78+
// Emitting to LLM API is I/O bound, so doing it here in batches is okay
79+
// provided we don't block.
80+
await this.processChunks(chunks);
81+
} else if (message.type === "ERROR") {
82+
this.logger.error("Worker processing error", message.error);
83+
}
84+
}
85+
86+
private async processChunks(chunks: any[]) {
87+
// We'll generate embeddings for these chunks
88+
for (const chunk of chunks) {
89+
try {
90+
const embedding = await this.embeddingService.generateEmbedding(
91+
chunk.text,
92+
);
93+
if (embedding) {
94+
await this.vectorStore.addDocument({
95+
id: chunk.id,
96+
text: chunk.text,
97+
vector: embedding,
98+
metadata: chunk.metadata,
99+
});
100+
}
101+
} catch (err) {
102+
this.logger.warn(
103+
`Failed to generate embedding for chunk ${chunk.id}`,
104+
err,
105+
);
106+
}
107+
}
108+
this.logger.info(`Persisted ${chunks.length} chunks to vector store`);
109+
}
110+
111+
public indexFile(filePath: string, content: string) {
112+
if (!this.worker) return;
113+
114+
// Send to worker
115+
this.worker.postMessage({
116+
type: "INDEX_FILE",
117+
data: { filePath, content },
118+
});
119+
}
120+
}

src/services/news.service.ts

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -37,28 +37,43 @@ export class NewsService {
3737
public async fetchAndStoreNews(): Promise<void> {
3838
try {
3939
await this.ensureInitialized();
40-
this.logger.info("Fetching news from Hacker News...");
40+
this.logger.info("Fetching curated news from Dev.to...");
4141

42-
// Get top stories IDs
43-
const { data: topStories } = await axios.get<number[]>(
44-
"https://hacker-news.firebaseio.com/v0/topstories.json",
45-
);
42+
// Define interests and distribution for 5 items
43+
// 2 AI/Agents, 2 Architecture, 1 Leadership
44+
const categories = [
45+
{ tag: "ai", count: 2 },
46+
{ tag: "architecture", count: 2 },
47+
{ tag: "leadership", count: 1 },
48+
];
4649

47-
// Get details for top 5 stories
48-
const top5 = topStories.slice(0, 5);
4950
const newsItems: NewsItem[] = [];
5051

51-
for (const id of top5) {
52-
const { data: story } = await axios.get(
53-
`https://hacker-news.firebaseio.com/v0/item/${id}.json`,
54-
);
55-
if (story && story.url) {
56-
newsItems.push({
57-
title: story.title,
58-
url: story.url,
59-
source: "Hacker News",
60-
published_at: new Date(story.time * 1000).toISOString(),
52+
for (const cat of categories) {
53+
try {
54+
const { data } = await axios.get("https://dev.to/api/articles", {
55+
params: {
56+
tag: cat.tag,
57+
per_page: cat.count,
58+
// 'fresh' ensures we get new articles, or we can use default (hot/rising)
59+
// using default to ensure quality
60+
},
61+
timeout: 5000,
6162
});
63+
64+
if (Array.isArray(data)) {
65+
data.forEach((article: any) => {
66+
newsItems.push({
67+
title: article.title,
68+
url: article.url,
69+
source: "Dev.to", // or `Dev.to (${cat.tag})`
70+
published_at: article.published_at || new Date().toISOString(),
71+
summary: article.description,
72+
});
73+
});
74+
}
75+
} catch (err) {
76+
this.logger.error(`Failed to fetch articles for tag ${cat.tag}`, err);
6277
}
6378
}
6479

@@ -73,9 +88,15 @@ export class NewsService {
7388

7489
if (existing.length === 0) {
7590
this.dbService.executeSqlCommand(
76-
`INSERT INTO news_items (title, url, source, published_at, read_status)
77-
VALUES (?, ?, ?, ?, 0)`,
78-
[item.title, item.url, item.source, item.published_at],
91+
`INSERT INTO news_items (title, url, source, published_at, read_status, summary)
92+
VALUES (?, ?, ?, ?, 0, ?)`,
93+
[
94+
item.title,
95+
item.url,
96+
item.source,
97+
item.published_at,
98+
item.summary || "",
99+
],
79100
);
80101
}
81102
}

0 commit comments

Comments
 (0)