forked from github/github-mcp-server
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathAgentic.ts
More file actions
59 lines (48 loc) · 1.93 KB
/
Agentic.ts
File metadata and controls
59 lines (48 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
// Pseudocode for integrating Self-Adapting into an Agent Framework
class SelfAdaptingAgent {
private selfEditBuffer: Array<SelfEdit> = [];
private sftThreshold: number = 10; // Train after 10 edits
private model: LLM;
async handleInteraction(input: string, context: any): Promise<string> {
const response = await this.model.generateResponse(input, context);
// ... (User interacts, provides feedback, or agent encounters a novel situation)
// Generate Self-Edit
const selfEdit = this.generateSelfEdit(input, response, context);
this.selfEditBuffer.push(selfEdit);
// Check if ready for SFT
if (this.selfEditBuffer.length >= this.sftThreshold) {
await this.performSFT();
this.selfEditBuffer = []; // Clear buffer
}
return response;
}
private generateSelfEdit(input: string, output: string, context: any): SelfEdit {
// Use LLM to analyze the interaction and suggest improvements
// This is the core "self-generation" step
const prompt = `
You are an AI agent reflecting on a recent interaction.
Input: ${input}
Your Output: ${output}
Context: ${JSON.stringify(context)}
What is one way you could improve your response? Generate a corrected version or specify a hyperparameter change.
`;
const edit = await this.model.generate(prompt); // This generates the "self-edit"
return { input, original_output: output, improved_output: edit };
}
private async performSFT() {
// Use the selfEditBuffer to create a fine-tuning dataset
const dataset = this.selfEditBuffer.map(edit => ({
prompt: edit.input,
completion: edit.improved_output
}));
// Perform lightweight SFT on the model
await this.model.fineTune(dataset);
// Optional: Log performance before/after for reward signal
// await this.evaluatePerformance();
}
}
interface SelfEdit {
input: string;
original_output: string;
improved_output: string;
}