Skip to content

Commit 93f7c57

Browse files
authored
Merge pull request #6179 from ethereum/completion_ratelimit_and_caching
Completion ratelimit and caching
2 parents a631fda + 82158d2 commit 93f7c57

File tree

8 files changed

+775
-84
lines changed

8 files changed

+775
-84
lines changed

apps/remix-ide-e2e/src/tests/ai_panel.test.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ module.exports = {
155155
.assistantWorkspace('comment all function', 'mistralai')
156156
.waitForElementVisible({
157157
locateStrategy: 'xpath',
158-
selector: '//div[contains(@class,"chat-bubble") and (contains(.,"Modified Files") or contains(.,"No Changes applied"))]',
158+
selector: '//div[contains(@class,"chat-bubble") and (contains(.,"Modified Files") or contains(.,"No Changes applied") or contains(.,"No files modified"))]',
159159
timeout: 60000
160160
})
161161
.waitForElementPresent({
@@ -203,7 +203,7 @@ module.exports = {
203203
.assistantWorkspace('remove all comments', 'openai')
204204
.waitForElementVisible({
205205
locateStrategy: 'xpath',
206-
selector: '//div[contains(@class,"chat-bubble") and (contains(.,"Modified Files") or contains(.,"No Changes applied"))]',
206+
selector: '//div[contains(@class,"chat-bubble") and (contains(.,"Modified Files") or contains(.,"No Changes applied") or contains(.,"No files modified"))]',
207207
timeout: 60000
208208
})
209209
.waitForElementPresent({
@@ -223,7 +223,7 @@ module.exports = {
223223
.assistantWorkspace('remove all comments', 'anthropic')
224224
.waitForElementVisible({
225225
locateStrategy: 'xpath',
226-
selector: '//div[contains(@class,"chat-bubble") and (contains(.,"Modified Files") or contains(.,"No Changes applied"))]',
226+
selector: '//div[contains(@class,"chat-bubble") and (contains(.,"Modified Files") or contains(.,"No Changes applied") or contains(.,"No files modified"))]',
227227
timeout: 60000
228228
})
229229
.waitForElementPresent({

libs/remix-ai-core/src/inferencers/remote/remoteInference.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export class RemoteInferencer implements ICompletions, IGeneration {
2626
const requestURL = rType === AIRequestType.COMPLETION ? this.completion_url : this.api_url
2727

2828
try {
29-
const options = { headers: { 'Content-Type': 'application/json', } }
29+
const options = AIRequestType.COMPLETION ? { headers: { 'Content-Type': 'application/json', }, timeout: 3000 } : { headers: { 'Content-Type': 'application/json', } }
3030
const result = await axios.post(requestURL, payload, options)
3131

3232
switch (rType) {
@@ -49,6 +49,7 @@ export class RemoteInferencer implements ICompletions, IGeneration {
4949
} catch (e) {
5050
ChatHistory.clearHistory()
5151
console.error('Error making request to Inference server:', e.message)
52+
return ""
5253
}
5354
finally {
5455
this.event.emit("onInferenceDone")
@@ -110,7 +111,7 @@ export class RemoteInferencer implements ICompletions, IGeneration {
110111
}
111112

112113
async code_completion(prompt, promptAfter, ctxFiles, fileName, options:IParams=CompletionParams): Promise<any> {
113-
options.max_tokens = 10
114+
options.max_tokens = 30
114115
const payload = { prompt, 'context':promptAfter, "endpoint":"code_completion",
115116
'ctxFiles':ctxFiles, 'currentFileName':fileName, ...options }
116117
return this._makeRequest(payload, AIRequestType.COMPLETION)
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
/**
2+
* Adaptive Rate Limiter for Inline Completions
3+
*
4+
*/
5+
6+
interface CompletionHistoryEntry {
7+
timestamp: number;
8+
accepted: boolean;
9+
}
10+
11+
interface AdaptiveRateLimiterOptions {
12+
minRequestInterval?: number;
13+
completionCooldown?: number;
14+
historyWindow?: number;
15+
baseAdaptiveCooldown?: number;
16+
maxAdaptiveCooldown?: number;
17+
}
18+
19+
interface AdaptiveRateLimiterStats {
20+
acceptanceRate: number;
21+
totalCompletions: number;
22+
acceptedCompletions: number;
23+
rejectedCompletions: number;
24+
currentCooldown: number;
25+
}
26+
27+
export class AdaptiveRateLimiter {
28+
private lastRequestTime: number = 0;
29+
private lastCompletionTime: number = 0;
30+
private acceptanceRate: number = 0.5;
31+
private totalCompletions: number = 0;
32+
private acceptedCompletions: number = 0;
33+
private rejectedCompletions: number = 0;
34+
private recentCompletionHistory: CompletionHistoryEntry[] = [];
35+
36+
private readonly minRequestInterval: number = 500;
37+
private readonly completionCooldown: number = 2000;
38+
private readonly historyWindow: number = 300000; // 5 minutes
39+
private readonly baseAdaptiveCooldown: number = 1000;
40+
private readonly maxAdaptiveCooldown: number = 10000;
41+
42+
constructor(options?: AdaptiveRateLimiterOptions) {
43+
if (options) {
44+
this.minRequestInterval = options.minRequestInterval ?? this.minRequestInterval;
45+
this.completionCooldown = options.completionCooldown ?? this.completionCooldown;
46+
this.historyWindow = options.historyWindow ?? this.historyWindow;
47+
this.baseAdaptiveCooldown = options.baseAdaptiveCooldown ?? this.baseAdaptiveCooldown;
48+
this.maxAdaptiveCooldown = options.maxAdaptiveCooldown ?? this.maxAdaptiveCooldown;
49+
}
50+
}
51+
52+
shouldAllowRequest(currentTime: number = Date.now()): boolean {
53+
const timeSinceLastRequest = currentTime - this.lastRequestTime;
54+
const timeSinceLastCompletion = currentTime - this.lastCompletionTime;
55+
const adaptiveCooldown = this.getAdaptiveCooldown();
56+
57+
const minIntervalCheck = timeSinceLastRequest < this.minRequestInterval;
58+
const adaptiveCooldownCheck = timeSinceLastCompletion < adaptiveCooldown;
59+
60+
// console.log('[AdaptiveRateLimiter] shouldAllowRequest check:', {
61+
// timeSinceLastRequest,
62+
// timeSinceLastCompletion,
63+
// minRequestInterval: this.minRequestInterval,
64+
// adaptiveCooldown,
65+
// acceptanceRate: this.acceptanceRate,
66+
// minIntervalCheck,
67+
// adaptiveCooldownCheck
68+
// });
69+
70+
// Check minimum request interval
71+
if (minIntervalCheck) {
72+
// console.log('[AdaptiveRateLimiter] Blocked: minimum request interval not met');
73+
return false;
74+
}
75+
76+
// Check adaptive cooldown
77+
if (adaptiveCooldownCheck) {
78+
// console.log('[AdaptiveRateLimiter] Blocked: adaptive cooldown active');
79+
return false;
80+
}
81+
82+
// console.log('[AdaptiveRateLimiter] Request allowed');
83+
return true;
84+
}
85+
86+
recordRequest(currentTime: number = Date.now()): void {
87+
// console.log('[AdaptiveRateLimiter] Recording request at:', currentTime);
88+
this.lastRequestTime = currentTime;
89+
}
90+
91+
recordCompletion(currentTime: number = Date.now()): void {
92+
// console.log('[AdaptiveRateLimiter] Recording completion at:', currentTime);
93+
this.lastCompletionTime = currentTime;
94+
}
95+
96+
trackCompletionShown(): void {
97+
this.totalCompletions++;
98+
this.recentCompletionHistory.push({
99+
timestamp: Date.now(),
100+
accepted: false
101+
});
102+
// console.log('[AdaptiveRateLimiter] Completion shown, total:', this.totalCompletions);
103+
}
104+
105+
trackCompletionAccepted(): void {
106+
this.acceptedCompletions++;
107+
108+
// Update the most recent completion as accepted
109+
if (this.recentCompletionHistory.length > 0) {
110+
this.recentCompletionHistory[this.recentCompletionHistory.length - 1].accepted = true;
111+
}
112+
113+
// console.log('[AdaptiveRateLimiter] Completion accepted, total accepted:', this.acceptedCompletions);
114+
}
115+
116+
trackCompletionRejected(): void {
117+
this.rejectedCompletions++;
118+
// console.log('[AdaptiveRateLimiter] Completion rejected, total rejected:', this.rejectedCompletions);
119+
}
120+
121+
private getAdaptiveCooldown(): number {
122+
this.updateAcceptanceRate();
123+
// high fidelity adoption
124+
// Higher acceptance rate = shorter cooldown, lower acceptance rate = longer cooldown
125+
const adaptiveFactor = Math.max(0.1, 1 - this.acceptanceRate);
126+
const adaptiveCooldown = Math.min(
127+
this.maxAdaptiveCooldown,
128+
this.baseAdaptiveCooldown + (this.baseAdaptiveCooldown * adaptiveFactor * 5)
129+
);
130+
131+
return Math.max(this.completionCooldown, adaptiveCooldown);
132+
}
133+
134+
private updateAcceptanceRate(): void {
135+
const currentTime = Date.now();
136+
const oldHistoryLength = this.recentCompletionHistory.length;
137+
138+
// Remove old entries beyond the history window
139+
this.recentCompletionHistory = this.recentCompletionHistory.filter(
140+
entry => currentTime - entry.timestamp < this.historyWindow
141+
);
142+
143+
// Calculate acceptance rate from recent history
144+
if (this.recentCompletionHistory.length > 0) {
145+
const recentAccepted = this.recentCompletionHistory.filter(entry => entry.accepted).length;
146+
this.acceptanceRate = recentAccepted / this.recentCompletionHistory.length;
147+
} else {
148+
// Default to 0.5 if no recent history
149+
// do not penalize anyone at startup
150+
this.acceptanceRate = 0.5;
151+
}
152+
153+
// console.log('[AdaptiveRateLimiter] Acceptance rate updated:', {
154+
// oldHistoryLength,
155+
// newHistoryLength: this.recentCompletionHistory.length,
156+
// acceptanceRate: this.acceptanceRate
157+
// });
158+
}
159+
160+
getStats(): AdaptiveRateLimiterStats {
161+
return {
162+
acceptanceRate: this.acceptanceRate,
163+
totalCompletions: this.totalCompletions,
164+
acceptedCompletions: this.acceptedCompletions,
165+
rejectedCompletions: this.rejectedCompletions,
166+
currentCooldown: this.getAdaptiveCooldown()
167+
};
168+
}
169+
}

0 commit comments

Comments
 (0)