Skip to content

Commit 8b242d9

Browse files
STetsingyann300
authored andcommitted
adding rate limit, cachin mechanism and smart context filtering
1 parent a631fda commit 8b242d9

File tree

6 files changed

+660
-80
lines changed

6 files changed

+660
-80
lines changed

libs/remix-ai-core/src/inferencers/remote/remoteInference.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export class RemoteInferencer implements ICompletions, IGeneration {
2626
const requestURL = rType === AIRequestType.COMPLETION ? this.completion_url : this.api_url
2727

2828
try {
29-
const options = { headers: { 'Content-Type': 'application/json', } }
29+
const options = { headers: { 'Content-Type': 'application/json', }, timeout: 2000 }
3030
const result = await axios.post(requestURL, payload, options)
3131

3232
switch (rType) {
@@ -49,6 +49,7 @@ export class RemoteInferencer implements ICompletions, IGeneration {
4949
} catch (e) {
5050
ChatHistory.clearHistory()
5151
console.error('Error making request to Inference server:', e.message)
52+
return ""
5253
}
5354
finally {
5455
this.event.emit("onInferenceDone")
@@ -110,7 +111,7 @@ export class RemoteInferencer implements ICompletions, IGeneration {
110111
}
111112

112113
async code_completion(prompt, promptAfter, ctxFiles, fileName, options:IParams=CompletionParams): Promise<any> {
113-
options.max_tokens = 10
114+
options.max_tokens = 30
114115
const payload = { prompt, 'context':promptAfter, "endpoint":"code_completion",
115116
'ctxFiles':ctxFiles, 'currentFileName':fileName, ...options }
116117
return this._makeRequest(payload, AIRequestType.COMPLETION)
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/**
2+
* Adaptive Rate Limiter for Inline Completions
3+
*
4+
*/
5+
6+
interface CompletionHistoryEntry {
7+
timestamp: number;
8+
accepted: boolean;
9+
}
10+
11+
interface AdaptiveRateLimiterOptions {
12+
minRequestInterval?: number;
13+
completionCooldown?: number;
14+
historyWindow?: number;
15+
baseAdaptiveCooldown?: number;
16+
maxAdaptiveCooldown?: number;
17+
}
18+
19+
interface AdaptiveRateLimiterStats {
20+
acceptanceRate: number;
21+
totalCompletions: number;
22+
acceptedCompletions: number;
23+
rejectedCompletions: number;
24+
currentCooldown: number;
25+
}
26+
27+
export class AdaptiveRateLimiter {
28+
private lastRequestTime: number = 0;
29+
private lastCompletionTime: number = 0;
30+
private acceptanceRate: number = 0.5;
31+
private totalCompletions: number = 0;
32+
private acceptedCompletions: number = 0;
33+
private rejectedCompletions: number = 0;
34+
private recentCompletionHistory: CompletionHistoryEntry[] = [];
35+
36+
private readonly minRequestInterval: number = 500;
37+
private readonly completionCooldown: number = 2000;
38+
private readonly historyWindow: number = 300000; // 5 minutes
39+
private readonly baseAdaptiveCooldown: number = 1000;
40+
private readonly maxAdaptiveCooldown: number = 10000;
41+
42+
constructor(options?: AdaptiveRateLimiterOptions) {
43+
if (options) {
44+
this.minRequestInterval = options.minRequestInterval ?? this.minRequestInterval;
45+
this.completionCooldown = options.completionCooldown ?? this.completionCooldown;
46+
this.historyWindow = options.historyWindow ?? this.historyWindow;
47+
this.baseAdaptiveCooldown = options.baseAdaptiveCooldown ?? this.baseAdaptiveCooldown;
48+
this.maxAdaptiveCooldown = options.maxAdaptiveCooldown ?? this.maxAdaptiveCooldown;
49+
}
50+
}
51+
52+
shouldAllowRequest(currentTime: number = Date.now()): boolean {
53+
const timeSinceLastRequest = currentTime - this.lastRequestTime;
54+
const timeSinceLastCompletion = currentTime - this.lastCompletionTime;
55+
56+
// Check minimum request interval
57+
if (timeSinceLastRequest < this.minRequestInterval) {
58+
return false;
59+
}
60+
61+
// Check adaptive cooldown
62+
const adaptiveCooldown = this.getAdaptiveCooldown();
63+
if (timeSinceLastCompletion < adaptiveCooldown) {
64+
return false;
65+
}
66+
67+
return true;
68+
}
69+
70+
recordRequest(currentTime: number = Date.now()): void {
71+
this.lastRequestTime = currentTime;
72+
}
73+
74+
recordCompletion(currentTime: number = Date.now()): void {
75+
this.lastCompletionTime = currentTime;
76+
}
77+
78+
trackCompletionShown(): void {
79+
this.totalCompletions++;
80+
this.recentCompletionHistory.push({
81+
timestamp: Date.now(),
82+
accepted: false
83+
});
84+
}
85+
86+
trackCompletionAccepted(): void {
87+
this.acceptedCompletions++;
88+
89+
// Update the most recent completion as accepted
90+
if (this.recentCompletionHistory.length > 0) {
91+
this.recentCompletionHistory[this.recentCompletionHistory.length - 1].accepted = true;
92+
}
93+
}
94+
95+
trackCompletionRejected(): void {
96+
this.rejectedCompletions++;
97+
}
98+
99+
private getAdaptiveCooldown(): number {
100+
this.updateAcceptanceRate();
101+
// high fidelity adoption
102+
// Higher acceptance rate = shorter cooldown, lower acceptance rate = longer cooldown
103+
const adaptiveFactor = Math.max(0.1, 1 - this.acceptanceRate);
104+
const adaptiveCooldown = Math.min(
105+
this.maxAdaptiveCooldown,
106+
this.baseAdaptiveCooldown + (this.baseAdaptiveCooldown * adaptiveFactor * 5)
107+
);
108+
109+
return Math.max(this.completionCooldown, adaptiveCooldown);
110+
}
111+
112+
private updateAcceptanceRate(): void {
113+
const currentTime = Date.now();
114+
115+
// Remove old entries beyond the history window
116+
this.recentCompletionHistory = this.recentCompletionHistory.filter(
117+
entry => currentTime - entry.timestamp < this.historyWindow
118+
);
119+
120+
// Calculate acceptance rate from recent history
121+
if (this.recentCompletionHistory.length > 0) {
122+
const recentAccepted = this.recentCompletionHistory.filter(entry => entry.accepted).length;
123+
this.acceptanceRate = recentAccepted / this.recentCompletionHistory.length;
124+
} else {
125+
// Default to 0.5 if no recent history
126+
// do not penalize anyone at startup
127+
this.acceptanceRate = 0.5;
128+
}
129+
}
130+
131+
getStats(): AdaptiveRateLimiterStats {
132+
return {
133+
acceptanceRate: this.acceptanceRate,
134+
totalCompletions: this.totalCompletions,
135+
acceptedCompletions: this.acceptedCompletions,
136+
rejectedCompletions: this.rejectedCompletions,
137+
currentCooldown: this.getAdaptiveCooldown()
138+
};
139+
}
140+
}
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
/**
2+
* Completion Cache for Inline Completions
3+
*
4+
*/
5+
6+
import { monacoTypes } from '@remix-ui/editor';
7+
8+
interface CacheEntry {
9+
result: any;
10+
timestamp: number;
11+
}
12+
13+
interface CompletionCacheOptions {
14+
cacheTimeout?: number;
15+
maxCacheSize?: number;
16+
}
17+
18+
interface CompletionCacheStats {
19+
cacheSize: number;
20+
pendingRequests: number;
21+
hitRate: number;
22+
}
23+
24+
export class CompletionCache {
25+
private cache: Map<string, CacheEntry> = new Map();
26+
private pendingRequests: Map<string, Promise<any>> = new Map();
27+
28+
private readonly cacheTimeout: number = 30000; // 30 seconds
29+
private readonly maxCacheSize: number = 100;
30+
31+
constructor(options?: CompletionCacheOptions) {
32+
if (options) {
33+
this.cacheTimeout = options.cacheTimeout ?? this.cacheTimeout;
34+
this.maxCacheSize = options.maxCacheSize ?? this.maxCacheSize;
35+
}
36+
}
37+
38+
createCacheKey(
39+
word: string,
40+
wordAfter: string,
41+
position: monacoTypes.Position,
42+
task?: string
43+
): string {
44+
// truncate context
45+
const contextHash = `${word.slice(-100)}_${wordAfter.slice(0, 100)}_${position.lineNumber}_${position.column}`;
46+
return task ? `${task}_${contextHash}` : contextHash;
47+
}
48+
49+
/**
50+
* Get cached result if available and not expired
51+
*/
52+
getCachedResult(cacheKey: string): any | null {
53+
const cached = this.cache.get(cacheKey);
54+
if (cached && Date.now() - cached.timestamp < this.cacheTimeout) {
55+
return cached.result;
56+
}
57+
58+
// Remove expired cache entry
59+
if (cached) {
60+
this.cache.delete(cacheKey);
61+
}
62+
63+
return null;
64+
}
65+
66+
/**
67+
* Cache a completion result
68+
*/
69+
cacheResult(cacheKey: string, result: any): void {
70+
// Clean up old cache entries periodically
71+
if (this.cache.size >= this.maxCacheSize) {
72+
this.cleanupExpiredEntries();
73+
}
74+
75+
// If still at capacity, remove oldest entries
76+
if (this.cache.size >= this.maxCacheSize) {
77+
const oldestKey = this.cache.keys().next().value;
78+
if (oldestKey) {
79+
this.cache.delete(oldestKey);
80+
}
81+
}
82+
83+
this.cache.set(cacheKey, {
84+
result: result,
85+
timestamp: Date.now()
86+
});
87+
}
88+
89+
/**
90+
* Check if a request is already pending
91+
*/
92+
isPending(cacheKey: string): boolean {
93+
return this.pendingRequests.has(cacheKey);
94+
}
95+
96+
/**
97+
* Get pending request promise
98+
*/
99+
getPendingRequest(cacheKey: string): Promise<any> | null {
100+
return this.pendingRequests.get(cacheKey) || null;
101+
}
102+
103+
/**
104+
* Set a pending request
105+
*/
106+
setPendingRequest(cacheKey: string, promise: Promise<any>): void {
107+
this.pendingRequests.set(cacheKey, promise);
108+
}
109+
110+
/**
111+
* Remove a pending request
112+
*/
113+
removePendingRequest(cacheKey: string): void {
114+
this.pendingRequests.delete(cacheKey);
115+
}
116+
117+
/**
118+
* Handle a request with caching and deduplication
119+
*/
120+
async handleRequest<T>(
121+
cacheKey: string,
122+
requestFn: () => Promise<T>
123+
): Promise<T> {
124+
// Check cache first
125+
const cachedResult = this.getCachedResult(cacheKey);
126+
if (cachedResult) {
127+
return cachedResult;
128+
}
129+
130+
// Check if same request is already pending
131+
const pendingRequest = this.getPendingRequest(cacheKey);
132+
if (pendingRequest) {
133+
return await pendingRequest;
134+
}
135+
136+
// Create and store pending request
137+
const promise = requestFn();
138+
this.setPendingRequest(cacheKey, promise);
139+
140+
try {
141+
const result = await promise;
142+
this.cacheResult(cacheKey, result);
143+
return result;
144+
} finally {
145+
this.removePendingRequest(cacheKey);
146+
}
147+
}
148+
149+
private cleanupExpiredEntries(): void {
150+
const now = Date.now();
151+
for (const [key, entry] of this.cache.entries()) {
152+
if (now - entry.timestamp > this.cacheTimeout) {
153+
this.cache.delete(key);
154+
}
155+
}
156+
}
157+
158+
clear(): void {
159+
this.cache.clear();
160+
this.pendingRequests.clear();
161+
}
162+
163+
getStats(): CompletionCacheStats {
164+
return {
165+
cacheSize: this.cache.size,
166+
pendingRequests: this.pendingRequests.size,
167+
hitRate: 0 // Would need to track hits/misses to calculate this
168+
};
169+
}
170+
171+
cleanup(): void {
172+
this.cleanupExpiredEntries();
173+
}
174+
}

0 commit comments

Comments
 (0)