Skip to content

Commit b54f4ff

Browse files
committed
Merge remote-tracking branch 'origin/main' into test/ci-integration-suite
# Conflicts: # containers/api-proxy/rate-limiter.js # containers/api-proxy/rate-limiter.test.js # containers/api-proxy/server.js # src/cli.test.ts # src/cli.ts # src/types.ts # tests/integration/api-proxy-observability.test.ts # tests/integration/api-proxy-rate-limit.test.ts
2 parents 5f50de4 + 01950d5 commit b54f4ff

File tree

14 files changed

+471
-99
lines changed

14 files changed

+471
-99
lines changed

containers/api-proxy/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ COPY package*.json ./
1515
RUN npm ci --omit=dev
1616

1717
# Copy application files
18-
COPY server.js ./
18+
COPY server.js logging.js metrics.js rate-limiter.js ./
1919

2020
# Create non-root user
2121
RUN addgroup -S apiproxy && adduser -S apiproxy -G apiproxy

containers/api-proxy/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ Required (at least one):
3535
- `OPENAI_API_KEY` - OpenAI API key for authentication
3636
- `ANTHROPIC_API_KEY` - Anthropic API key for authentication
3737

38+
Optional:
39+
- `COPILOT_API_TARGET` - Target hostname for GitHub Copilot API requests (default: `api.githubcopilot.com`). Useful for GHES deployments.
40+
3841
Set by AWF:
3942
- `HTTP_PROXY` - Squid proxy URL (http://172.30.0.10:3128)
4043
- `HTTPS_PROXY` - Squid proxy URL (http://172.30.0.10:3128)

containers/api-proxy/rate-limiter.js

Lines changed: 67 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
'use strict';
1818

1919
// ── Defaults ────────────────────────────────────────────────────────────
20-
const DEFAULT_RPM = 60;
20+
const DEFAULT_RPM = 600;
2121
const DEFAULT_RPH = 1000;
2222
const DEFAULT_BYTES_PM = 50 * 1024 * 1024; // 50 MB
2323

@@ -58,10 +58,16 @@ function advanceWindow(win, now, size) {
5858

5959
// Zero out slots that have expired
6060
const slotsToZero = Math.min(elapsed, size);
61-
for (let i = 1; i <= slotsToZero; i++) {
62-
const slot = (win.lastSlot + i) % size;
63-
win.total -= win.counts[slot];
64-
win.counts[slot] = 0;
61+
if (slotsToZero >= size) {
62+
// Full window expired — reset directly to avoid total drift
63+
win.counts.fill(0);
64+
win.total = 0;
65+
} else {
66+
for (let i = 1; i <= slotsToZero; i++) {
67+
const slot = (win.lastSlot + i) % size;
68+
win.total -= win.counts[slot];
69+
win.counts[slot] = 0;
70+
}
6571
}
6672

6773
win.lastSlot = now % size;
@@ -83,25 +89,49 @@ function recordInWindow(win, now, size, value) {
8389
}
8490

8591
/**
86-
* Get the sliding window estimate of the current rate.
87-
*
88-
* Uses the formula: current_window_count + previous_window_weight * previous_total
89-
* where previous_window_weight = (slot_duration - elapsed_in_current_slot) / slot_duration
92+
* Get the current count in the sliding window.
9093
*
91-
* This is a simplified but effective approach: we use the total across
92-
* all current-window slots plus a weighted fraction of the oldest expired slot's
93-
* contribution to approximate the true sliding window.
94+
* After advancing the window to zero out stale slots, returns the
95+
* sum of all active slot counts.
9496
*
9597
* @param {object} win - Window object
9698
* @param {number} now - Current time in the slot's unit
9799
* @param {number} size - Window size
98-
* @returns {number} Estimated count in the window
100+
* @returns {number} Count of events in the current window
99101
*/
100102
function getWindowCount(win, now, size) {
101103
advanceWindow(win, now, size);
102104
return win.total;
103105
}
104106

107+
/**
108+
* Estimate how many time-units until the window count drops below a threshold.
109+
*
110+
* Scans backwards from the oldest slot in the window to find the first
111+
* non-zero slot. That slot will expire in (its age remaining) time-units.
112+
*
113+
* @param {object} win - Window object (must be advanced to `now` first)
114+
* @param {number} now - Current time in the slot's unit
115+
* @param {number} size - Window size
116+
* @param {number} limit - The threshold to drop below
117+
* @returns {number} Estimated time-units until count < limit (minimum 1)
118+
*/
119+
function estimateRetryAfter(win, now, size, limit) {
120+
// Walk from the oldest slot (now - size + 1) forward, accumulating
121+
// how much capacity is freed as each slot expires.
122+
let freed = 0;
123+
for (let age = size - 1; age >= 0; age--) {
124+
const slot = ((now - age) % size + size) % size;
125+
freed += win.counts[slot];
126+
if (win.total - freed < limit) {
127+
// This slot expires in (age + 1) time-units from now
128+
return Math.max(1, age + 1);
129+
}
130+
}
131+
// Shouldn't happen if total >= limit, but fall back to full window
132+
return Math.max(1, size);
133+
}
134+
105135
/**
106136
* Per-provider rate limit state.
107137
*/
@@ -119,7 +149,7 @@ class ProviderState {
119149
class RateLimiter {
120150
/**
121151
* @param {object} config
122-
* @param {number} [config.rpm=60] - Max requests per minute
152+
* @param {number} [config.rpm=600] - Max requests per minute
123153
* @param {number} [config.rph=1000] - Max requests per hour
124154
* @param {number} [config.bytesPm=52428800] - Max bytes per minute (50 MB)
125155
* @param {boolean} [config.enabled=true] - Whether rate limiting is active
@@ -180,8 +210,8 @@ class RateLimiter {
180210
// Check RPM (requests per minute)
181211
const rpmCount = getWindowCount(state.rpmWindow, nowSec, MINUTE_SLOTS);
182212
if (rpmCount >= this.rpm) {
183-
const resetAt = (nowSec + 1) + (MINUTE_SLOTS - 1);
184-
const retryAfter = Math.max(1, MINUTE_SLOTS - (nowSec % MINUTE_SLOTS));
213+
const retryAfter = estimateRetryAfter(state.rpmWindow, nowSec, MINUTE_SLOTS, this.rpm);
214+
const resetAt = nowSec + retryAfter;
185215
return {
186216
allowed: false,
187217
limitType: 'rpm',
@@ -195,7 +225,8 @@ class RateLimiter {
195225
// Check RPH (requests per hour)
196226
const rphCount = getWindowCount(state.rphWindow, nowMin, HOUR_SLOTS);
197227
if (rphCount >= this.rph) {
198-
const retryAfter = Math.max(1, (HOUR_SLOTS - (nowMin % HOUR_SLOTS)) * 60);
228+
const retryAfterMin = estimateRetryAfter(state.rphWindow, nowMin, HOUR_SLOTS, this.rph);
229+
const retryAfter = retryAfterMin * 60; // convert minutes to seconds
199230
const resetAt = Math.floor(nowMs / 1000) + retryAfter;
200231
return {
201232
allowed: false,
@@ -210,7 +241,7 @@ class RateLimiter {
210241
// Check bytes per minute
211242
const bytesCount = getWindowCount(state.bytesWindow, nowSec, MINUTE_SLOTS);
212243
if (bytesCount + requestBytes > this.bytesPm) {
213-
const retryAfter = Math.max(1, MINUTE_SLOTS - (nowSec % MINUTE_SLOTS));
244+
const retryAfter = estimateRetryAfter(state.bytesWindow, nowSec, MINUTE_SLOTS, this.bytesPm);
214245
const resetAt = nowSec + retryAfter;
215246
return {
216247
allowed: false,
@@ -271,17 +302,24 @@ class RateLimiter {
271302
const rpmCount = getWindowCount(state.rpmWindow, nowSec, MINUTE_SLOTS);
272303
const rphCount = getWindowCount(state.rphWindow, nowMin, HOUR_SLOTS);
273304

305+
const rpmRetry = rpmCount >= this.rpm
306+
? estimateRetryAfter(state.rpmWindow, nowSec, MINUTE_SLOTS, this.rpm)
307+
: 0;
308+
const rphRetry = rphCount >= this.rph
309+
? estimateRetryAfter(state.rphWindow, nowMin, HOUR_SLOTS, this.rph) * 60
310+
: 0;
311+
274312
return {
275313
enabled: true,
276314
rpm: {
277315
limit: this.rpm,
278316
remaining: Math.max(0, this.rpm - rpmCount),
279-
reset: nowSec + (MINUTE_SLOTS - (nowSec % MINUTE_SLOTS)),
317+
reset: rpmRetry > 0 ? nowSec + rpmRetry : 0,
280318
},
281319
rph: {
282320
limit: this.rph,
283321
remaining: Math.max(0, this.rph - rphCount),
284-
reset: Math.floor(nowMs / 1000) + (HOUR_SLOTS - (nowMin % HOUR_SLOTS)) * 60,
322+
reset: rphRetry > 0 ? Math.floor(nowMs / 1000) + rphRetry : 0,
285323
},
286324
};
287325
} catch (_err) {
@@ -309,15 +347,19 @@ class RateLimiter {
309347
* - AWF_RATE_LIMIT_RPM (default: 60)
310348
* - AWF_RATE_LIMIT_RPH (default: 1000)
311349
* - AWF_RATE_LIMIT_BYTES_PM (default: 52428800)
312-
* - AWF_RATE_LIMIT_ENABLED (default: "true")
350+
* - AWF_RATE_LIMIT_ENABLED (default: "false" — rate limiting is opt-in)
313351
*
314352
* @returns {RateLimiter}
315353
*/
316354
function create() {
317-
const rpm = parseInt(process.env.AWF_RATE_LIMIT_RPM, 10) || DEFAULT_RPM;
318-
const rph = parseInt(process.env.AWF_RATE_LIMIT_RPH, 10) || DEFAULT_RPH;
319-
const bytesPm = parseInt(process.env.AWF_RATE_LIMIT_BYTES_PM, 10) || DEFAULT_BYTES_PM;
320-
const enabled = process.env.AWF_RATE_LIMIT_ENABLED !== 'false';
355+
const rawRpm = parseInt(process.env.AWF_RATE_LIMIT_RPM, 10);
356+
const rawRph = parseInt(process.env.AWF_RATE_LIMIT_RPH, 10);
357+
const rawBytesPm = parseInt(process.env.AWF_RATE_LIMIT_BYTES_PM, 10);
358+
359+
const rpm = (Number.isFinite(rawRpm) && rawRpm > 0) ? rawRpm : DEFAULT_RPM;
360+
const rph = (Number.isFinite(rawRph) && rawRph > 0) ? rawRph : DEFAULT_RPH;
361+
const bytesPm = (Number.isFinite(rawBytesPm) && rawBytesPm > 0) ? rawBytesPm : DEFAULT_BYTES_PM;
362+
const enabled = process.env.AWF_RATE_LIMIT_ENABLED === 'true';
321363

322364
return new RateLimiter({ rpm, rph, bytesPm, enabled });
323365
}

containers/api-proxy/rate-limiter.test.js

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ describe('rate-limiter', () => {
66
describe('constructor', () => {
77
it('should use defaults when no config provided', () => {
88
const limiter = new RateLimiter();
9-
expect(limiter.rpm).toBe(60);
9+
expect(limiter.rpm).toBe(600);
1010
expect(limiter.rph).toBe(1000);
1111
expect(limiter.bytesPm).toBe(50 * 1024 * 1024);
1212
expect(limiter.enabled).toBe(true);
@@ -46,6 +46,7 @@ describe('rate-limiter', () => {
4646
process.env.AWF_RATE_LIMIT_RPM = '30';
4747
process.env.AWF_RATE_LIMIT_RPH = '500';
4848
process.env.AWF_RATE_LIMIT_BYTES_PM = '10485760';
49+
process.env.AWF_RATE_LIMIT_ENABLED = 'true';
4950
const limiter = create();
5051
expect(limiter.rpm).toBe(30);
5152
expect(limiter.rph).toBe(500);
@@ -59,16 +60,46 @@ describe('rate-limiter', () => {
5960
expect(limiter.enabled).toBe(false);
6061
});
6162

62-
it('should use defaults when env vars are not set', () => {
63+
it('should use defaults for negative env var values', () => {
64+
process.env.AWF_RATE_LIMIT_RPM = '-5';
65+
process.env.AWF_RATE_LIMIT_RPH = '-100';
66+
process.env.AWF_RATE_LIMIT_BYTES_PM = '-1024';
67+
const limiter = create();
68+
expect(limiter.rpm).toBe(600);
69+
expect(limiter.rph).toBe(1000);
70+
expect(limiter.bytesPm).toBe(50 * 1024 * 1024);
71+
});
72+
73+
it('should use defaults for zero env var values', () => {
74+
process.env.AWF_RATE_LIMIT_RPM = '0';
75+
process.env.AWF_RATE_LIMIT_RPH = '0';
76+
process.env.AWF_RATE_LIMIT_BYTES_PM = '0';
77+
const limiter = create();
78+
expect(limiter.rpm).toBe(600);
79+
expect(limiter.rph).toBe(1000);
80+
expect(limiter.bytesPm).toBe(50 * 1024 * 1024);
81+
});
82+
83+
it('should use defaults for non-numeric env var values', () => {
84+
process.env.AWF_RATE_LIMIT_RPM = 'abc';
85+
process.env.AWF_RATE_LIMIT_RPH = 'xyz';
86+
process.env.AWF_RATE_LIMIT_BYTES_PM = '';
87+
const limiter = create();
88+
expect(limiter.rpm).toBe(600);
89+
expect(limiter.rph).toBe(1000);
90+
expect(limiter.bytesPm).toBe(50 * 1024 * 1024);
91+
});
92+
93+
it('should default to disabled when env vars are not set', () => {
6394
delete process.env.AWF_RATE_LIMIT_RPM;
6495
delete process.env.AWF_RATE_LIMIT_RPH;
6596
delete process.env.AWF_RATE_LIMIT_BYTES_PM;
6697
delete process.env.AWF_RATE_LIMIT_ENABLED;
6798
const limiter = create();
68-
expect(limiter.rpm).toBe(60);
99+
expect(limiter.rpm).toBe(600);
69100
expect(limiter.rph).toBe(1000);
70101
expect(limiter.bytesPm).toBe(50 * 1024 * 1024);
71-
expect(limiter.enabled).toBe(true);
102+
expect(limiter.enabled).toBe(false);
72103
});
73104
});
74105

containers/api-proxy/server.js

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,37 @@ const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
4646
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
4747
const COPILOT_GITHUB_TOKEN = process.env.COPILOT_GITHUB_TOKEN;
4848

49+
// Configurable Copilot API target host (supports GHES/GHEC / custom endpoints)
50+
// Priority: COPILOT_API_TARGET env var > auto-derive from GITHUB_SERVER_URL > default
51+
function deriveCopilotApiTarget() {
52+
if (process.env.COPILOT_API_TARGET) {
53+
return process.env.COPILOT_API_TARGET;
54+
}
55+
// For GitHub Enterprise Cloud (*.ghe.com) or GitHub Enterprise Server
56+
// (any GITHUB_SERVER_URL that isn't https://github.com), route to the
57+
// enterprise Copilot API endpoint instead of the individual one.
58+
const serverUrl = process.env.GITHUB_SERVER_URL;
59+
if (serverUrl) {
60+
try {
61+
const hostname = new URL(serverUrl).hostname;
62+
if (hostname !== 'github.com') {
63+
return 'api.enterprise.githubcopilot.com';
64+
}
65+
} catch {
66+
// Invalid URL — fall through to default
67+
}
68+
}
69+
return 'api.githubcopilot.com';
70+
}
71+
const COPILOT_API_TARGET = deriveCopilotApiTarget();
72+
4973
// Squid proxy configuration (set via HTTP_PROXY/HTTPS_PROXY in docker-compose)
5074
const HTTPS_PROXY = process.env.HTTPS_PROXY || process.env.HTTP_PROXY;
5175

5276
logRequest('info', 'startup', {
5377
message: 'Starting AWF API proxy sidecar',
5478
squid_proxy: HTTPS_PROXY || 'not configured',
79+
copilot_api_target: COPILOT_API_TARGET,
5580
providers: {
5681
openai: !!OPENAI_API_KEY,
5782
anthropic: !!ANTHROPIC_API_KEY,
@@ -72,7 +97,11 @@ if (!proxyAgent) {
7297
function checkRateLimit(req, res, provider, requestBytes) {
7398
const check = limiter.check(provider, requestBytes);
7499
if (!check.allowed) {
75-
const requestId = req.headers['x-request-id'] || generateRequestId();
100+
const clientRequestId = req.headers['x-request-id'];
101+
const requestId = (typeof clientRequestId === 'string' &&
102+
clientRequestId.length <= 128 &&
103+
/^[\w\-\.]+$/.test(clientRequestId))
104+
? clientRequestId : generateRequestId();
76105
const limitLabels = { rpm: 'requests per minute', rph: 'requests per hour', bytes_pm: 'bytes per minute' };
77106
const windowLabel = limitLabels[check.limitType] || check.limitType;
78107

@@ -111,8 +140,14 @@ function checkRateLimit(req, res, provider, requestBytes) {
111140
/**
112141
* Forward a request to the target API, injecting auth headers and routing through Squid.
113142
*/
143+
/** Validate that a request ID is safe (alphanumeric, dashes, dots, max 128 chars). */
144+
function isValidRequestId(id) {
145+
return typeof id === 'string' && id.length <= 128 && /^[\w\-\.]+$/.test(id);
146+
}
147+
114148
function proxyRequest(req, res, targetHost, injectHeaders, provider) {
115-
const requestId = req.headers['x-request-id'] || generateRequestId();
149+
const clientRequestId = req.headers['x-request-id'];
150+
const requestId = isValidRequestId(clientRequestId) ? clientRequestId : generateRequestId();
116151
const startTime = Date.now();
117152

118153
// Propagate request ID back to the client and forward to upstream
@@ -153,6 +188,8 @@ function proxyRequest(req, res, targetHost, injectHeaders, provider) {
153188

154189
// Handle client-side errors (e.g. aborted connections)
155190
req.on('error', (err) => {
191+
if (errored) return; // Prevent double handling
192+
errored = true;
156193
const duration = Date.now() - startTime;
157194
metrics.gaugeDec('active_requests', { provider });
158195
metrics.increment('requests_errors_total', { provider });
@@ -175,9 +212,10 @@ function proxyRequest(req, res, targetHost, injectHeaders, provider) {
175212
const chunks = [];
176213
let totalBytes = 0;
177214
let rejected = false;
215+
let errored = false;
178216

179217
req.on('data', chunk => {
180-
if (rejected) return;
218+
if (rejected || errored) return;
181219
totalBytes += chunk.length;
182220
if (totalBytes > MAX_BODY_SIZE) {
183221
rejected = true;
@@ -204,7 +242,7 @@ function proxyRequest(req, res, targetHost, injectHeaders, provider) {
204242
});
205243

206244
req.on('end', () => {
207-
if (rejected) return;
245+
if (rejected || errored) return;
208246
const body = Buffer.concat(chunks);
209247
const requestBytes = body.length;
210248

@@ -356,7 +394,8 @@ const HEALTH_PORT = 10000;
356394
if (OPENAI_API_KEY) {
357395
const server = http.createServer((req, res) => {
358396
if (handleManagementEndpoint(req, res)) return;
359-
if (checkRateLimit(req, res, 'openai', 0)) return;
397+
const contentLength = parseInt(req.headers['content-length'], 10) || 0;
398+
if (checkRateLimit(req, res, 'openai', contentLength)) return;
360399

361400
proxyRequest(req, res, 'api.openai.com', {
362401
'Authorization': `Bearer ${OPENAI_API_KEY}`,
@@ -389,7 +428,8 @@ if (ANTHROPIC_API_KEY) {
389428
return;
390429
}
391430

392-
if (checkRateLimit(req, res, 'anthropic', 0)) return;
431+
const contentLength = parseInt(req.headers['content-length'], 10) || 0;
432+
if (checkRateLimit(req, res, 'anthropic', contentLength)) return;
393433

394434
// Only set anthropic-version as default; preserve agent-provided version
395435
const anthropicHeaders = { 'x-api-key': ANTHROPIC_API_KEY };
@@ -415,9 +455,10 @@ if (COPILOT_GITHUB_TOKEN) {
415455
return;
416456
}
417457

418-
if (checkRateLimit(req, res, 'copilot', 0)) return;
458+
const contentLength = parseInt(req.headers['content-length'], 10) || 0;
459+
if (checkRateLimit(req, res, 'copilot', contentLength)) return;
419460

420-
proxyRequest(req, res, 'api.githubcopilot.com', {
461+
proxyRequest(req, res, COPILOT_API_TARGET, {
421462
'Authorization': `Bearer ${COPILOT_GITHUB_TOKEN}`,
422463
}, 'copilot');
423464
});

0 commit comments

Comments
 (0)