github
diff --git a/‎containers/api-proxy/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎containers/api-proxy/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎containers/api-proxy/README.md‎
Lines changed: 3 additions & 0 deletions b/‎containers/api-proxy/README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎containers/api-proxy/rate-limiter.js‎
Lines changed: 67 additions & 25 deletions b/‎containers/api-proxy/rate-limiter.js‎
Lines changed: 67 additions & 25 deletions
diff --git a/‎containers/api-proxy/rate-limiter.test.js‎
Lines changed: 35 additions & 4 deletions b/‎containers/api-proxy/rate-limiter.test.js‎
Lines changed: 35 additions & 4 deletions
diff --git a/‎containers/api-proxy/server.js‎
Lines changed: 49 additions & 8 deletions b/‎containers/api-proxy/server.js‎
Lines changed: 49 additions & 8 deletions
@@ -15,7 +15,7 @@ COPY package*.json ./
 RUN npm ci --omit=dev
 
 # Copy application files
-COPY server.js ./
+COPY server.js logging.js metrics.js rate-limiter.js ./
 
 # Create non-root user
 RUN addgroup -S apiproxy && adduser -S apiproxy -G apiproxy
 
@@ -35,6 +35,9 @@ Required (at least one):
 - `OPENAI_API_KEY` - OpenAI API key for authentication
 - `ANTHROPIC_API_KEY` - Anthropic API key for authentication
 
+Optional:
+- `COPILOT_API_TARGET` - Target hostname for GitHub Copilot API requests (default: `api.githubcopilot.com`). Useful for GHES deployments.
+
 Set by AWF:
 - `HTTP_PROXY` - Squid proxy URL (http://172.30.0.10:3128)
 - `HTTPS_PROXY` - Squid proxy URL (http://172.30.0.10:3128)
 
@@ -17,7 +17,7 @@
 'use strict';
 
 // ── Defaults ────────────────────────────────────────────────────────────
-const DEFAULT_RPM = 60;
+const DEFAULT_RPM = 600;
 const DEFAULT_RPH = 1000;
 const DEFAULT_BYTES_PM = 50 * 1024 * 1024; // 50 MB
 
@@ -58,10 +58,16 @@ function advanceWindow(win, now, size) {
 
   // Zero out slots that have expired
   const slotsToZero = Math.min(elapsed, size);
-  for (let i = 1; i <= slotsToZero; i++) {
-    const slot = (win.lastSlot + i) % size;
-    win.total -= win.counts[slot];
-    win.counts[slot] = 0;
+  if (slotsToZero >= size) {
+    // Full window expired — reset directly to avoid total drift
+    win.counts.fill(0);
+    win.total = 0;
+  } else {
+    for (let i = 1; i <= slotsToZero; i++) {
+      const slot = (win.lastSlot + i) % size;
+      win.total -= win.counts[slot];
+      win.counts[slot] = 0;
+    }
   }
 
   win.lastSlot = now % size;
@@ -83,25 +89,49 @@ function recordInWindow(win, now, size, value) {
 }
 
 /**
- * Get the sliding window estimate of the current rate.
- *
- * Uses the formula: current_window_count + previous_window_weight * previous_total
- * where previous_window_weight = (slot_duration - elapsed_in_current_slot) / slot_duration
+ * Get the current count in the sliding window.
  *
- * This is a simplified but effective approach: we use the total across
- * all current-window slots plus a weighted fraction of the oldest expired slot's
- * contribution to approximate the true sliding window.
+ * After advancing the window to zero out stale slots, returns the
+ * sum of all active slot counts.
  *
  * @param {object} win - Window object
  * @param {number} now - Current time in the slot's unit
  * @param {number} size - Window size
- * @returns {number} Estimated count in the window
+ * @returns {number} Count of events in the current window
  */
 function getWindowCount(win, now, size) {
   advanceWindow(win, now, size);
   return win.total;
 }
 
+/**
+ * Estimate how many time-units until the window count drops below a threshold.
+ *
+ * Scans backwards from the oldest slot in the window to find the first
+ * non-zero slot. That slot will expire in (its age remaining) time-units.
+ *
+ * @param {object} win - Window object (must be advanced to `now` first)
+ * @param {number} now - Current time in the slot's unit
+ * @param {number} size - Window size
+ * @param {number} limit - The threshold to drop below
+ * @returns {number} Estimated time-units until count < limit (minimum 1)
+ */
+function estimateRetryAfter(win, now, size, limit) {
+  // Walk from the oldest slot (now - size + 1) forward, accumulating
+  // how much capacity is freed as each slot expires.
+  let freed = 0;
+  for (let age = size - 1; age >= 0; age--) {
+    const slot = ((now - age) % size + size) % size;
+    freed += win.counts[slot];
+    if (win.total - freed < limit) {
+      // This slot expires in (age + 1) time-units from now
+      return Math.max(1, age + 1);
+    }
+  }
+  // Shouldn't happen if total >= limit, but fall back to full window
+  return Math.max(1, size);
+}
+
 /**
  * Per-provider rate limit state.
  */
@@ -119,7 +149,7 @@ class ProviderState {
 class RateLimiter {
   /**
    * @param {object} config
-   * @param {number} [config.rpm=60] - Max requests per minute
+   * @param {number} [config.rpm=600] - Max requests per minute
    * @param {number} [config.rph=1000] - Max requests per hour
    * @param {number} [config.bytesPm=52428800] - Max bytes per minute (50 MB)
    * @param {boolean} [config.enabled=true] - Whether rate limiting is active
@@ -180,8 +210,8 @@ class RateLimiter {
       // Check RPM (requests per minute)
       const rpmCount = getWindowCount(state.rpmWindow, nowSec, MINUTE_SLOTS);
       if (rpmCount >= this.rpm) {
-        const resetAt = (nowSec + 1) + (MINUTE_SLOTS - 1);
-        const retryAfter = Math.max(1, MINUTE_SLOTS - (nowSec % MINUTE_SLOTS));
+        const retryAfter = estimateRetryAfter(state.rpmWindow, nowSec, MINUTE_SLOTS, this.rpm);
+        const resetAt = nowSec + retryAfter;
         return {
           allowed: false,
           limitType: 'rpm',
@@ -195,7 +225,8 @@ class RateLimiter {
       // Check RPH (requests per hour)
       const rphCount = getWindowCount(state.rphWindow, nowMin, HOUR_SLOTS);
       if (rphCount >= this.rph) {
-        const retryAfter = Math.max(1, (HOUR_SLOTS - (nowMin % HOUR_SLOTS)) * 60);
+        const retryAfterMin = estimateRetryAfter(state.rphWindow, nowMin, HOUR_SLOTS, this.rph);
+        const retryAfter = retryAfterMin * 60; // convert minutes to seconds
         const resetAt = Math.floor(nowMs / 1000) + retryAfter;
         return {
           allowed: false,
@@ -210,7 +241,7 @@ class RateLimiter {
       // Check bytes per minute
       const bytesCount = getWindowCount(state.bytesWindow, nowSec, MINUTE_SLOTS);
       if (bytesCount + requestBytes > this.bytesPm) {
-        const retryAfter = Math.max(1, MINUTE_SLOTS - (nowSec % MINUTE_SLOTS));
+        const retryAfter = estimateRetryAfter(state.bytesWindow, nowSec, MINUTE_SLOTS, this.bytesPm);
         const resetAt = nowSec + retryAfter;
         return {
           allowed: false,
@@ -271,17 +302,24 @@ class RateLimiter {
       const rpmCount = getWindowCount(state.rpmWindow, nowSec, MINUTE_SLOTS);
       const rphCount = getWindowCount(state.rphWindow, nowMin, HOUR_SLOTS);
 
+      const rpmRetry = rpmCount >= this.rpm
+        ? estimateRetryAfter(state.rpmWindow, nowSec, MINUTE_SLOTS, this.rpm)
+        : 0;
+      const rphRetry = rphCount >= this.rph
+        ? estimateRetryAfter(state.rphWindow, nowMin, HOUR_SLOTS, this.rph) * 60
+        : 0;
+
       return {
         enabled: true,
         rpm: {
           limit: this.rpm,
           remaining: Math.max(0, this.rpm - rpmCount),
-          reset: nowSec + (MINUTE_SLOTS - (nowSec % MINUTE_SLOTS)),
+          reset: rpmRetry > 0 ? nowSec + rpmRetry : 0,
         },
         rph: {
           limit: this.rph,
           remaining: Math.max(0, this.rph - rphCount),
-          reset: Math.floor(nowMs / 1000) + (HOUR_SLOTS - (nowMin % HOUR_SLOTS)) * 60,
+          reset: rphRetry > 0 ? Math.floor(nowMs / 1000) + rphRetry : 0,
         },
       };
     } catch (_err) {
@@ -309,15 +347,19 @@ class RateLimiter {
  * - AWF_RATE_LIMIT_RPM (default: 60)
  * - AWF_RATE_LIMIT_RPH (default: 1000)
  * - AWF_RATE_LIMIT_BYTES_PM (default: 52428800)
- * - AWF_RATE_LIMIT_ENABLED (default: "true")
+ * - AWF_RATE_LIMIT_ENABLED (default: "false" — rate limiting is opt-in)
  *
  * @returns {RateLimiter}
  */
 function create() {
-  const rpm = parseInt(process.env.AWF_RATE_LIMIT_RPM, 10) || DEFAULT_RPM;
-  const rph = parseInt(process.env.AWF_RATE_LIMIT_RPH, 10) || DEFAULT_RPH;
-  const bytesPm = parseInt(process.env.AWF_RATE_LIMIT_BYTES_PM, 10) || DEFAULT_BYTES_PM;
-  const enabled = process.env.AWF_RATE_LIMIT_ENABLED !== 'false';
+  const rawRpm = parseInt(process.env.AWF_RATE_LIMIT_RPM, 10);
+  const rawRph = parseInt(process.env.AWF_RATE_LIMIT_RPH, 10);
+  const rawBytesPm = parseInt(process.env.AWF_RATE_LIMIT_BYTES_PM, 10);
+
+  const rpm = (Number.isFinite(rawRpm) && rawRpm > 0) ? rawRpm : DEFAULT_RPM;
+  const rph = (Number.isFinite(rawRph) && rawRph > 0) ? rawRph : DEFAULT_RPH;
+  const bytesPm = (Number.isFinite(rawBytesPm) && rawBytesPm > 0) ? rawBytesPm : DEFAULT_BYTES_PM;
+  const enabled = process.env.AWF_RATE_LIMIT_ENABLED === 'true';
 
   return new RateLimiter({ rpm, rph, bytesPm, enabled });
 }
 
@@ -6,7 +6,7 @@ describe('rate-limiter', () => {
   describe('constructor', () => {
     it('should use defaults when no config provided', () => {
       const limiter = new RateLimiter();
-      expect(limiter.rpm).toBe(60);
+      expect(limiter.rpm).toBe(600);
       expect(limiter.rph).toBe(1000);
       expect(limiter.bytesPm).toBe(50 * 1024 * 1024);
       expect(limiter.enabled).toBe(true);
@@ -46,6 +46,7 @@ describe('rate-limiter', () => {
       process.env.AWF_RATE_LIMIT_RPM = '30';
       process.env.AWF_RATE_LIMIT_RPH = '500';
       process.env.AWF_RATE_LIMIT_BYTES_PM = '10485760';
+      process.env.AWF_RATE_LIMIT_ENABLED = 'true';
       const limiter = create();
       expect(limiter.rpm).toBe(30);
       expect(limiter.rph).toBe(500);
@@ -59,16 +60,46 @@ describe('rate-limiter', () => {
       expect(limiter.enabled).toBe(false);
     });
 
-    it('should use defaults when env vars are not set', () => {
+    it('should use defaults for negative env var values', () => {
+      process.env.AWF_RATE_LIMIT_RPM = '-5';
+      process.env.AWF_RATE_LIMIT_RPH = '-100';
+      process.env.AWF_RATE_LIMIT_BYTES_PM = '-1024';
+      const limiter = create();
+      expect(limiter.rpm).toBe(600);
+      expect(limiter.rph).toBe(1000);
+      expect(limiter.bytesPm).toBe(50 * 1024 * 1024);
+    });
+
+    it('should use defaults for zero env var values', () => {
+      process.env.AWF_RATE_LIMIT_RPM = '0';
+      process.env.AWF_RATE_LIMIT_RPH = '0';
+      process.env.AWF_RATE_LIMIT_BYTES_PM = '0';
+      const limiter = create();
+      expect(limiter.rpm).toBe(600);
+      expect(limiter.rph).toBe(1000);
+      expect(limiter.bytesPm).toBe(50 * 1024 * 1024);
+    });
+
+    it('should use defaults for non-numeric env var values', () => {
+      process.env.AWF_RATE_LIMIT_RPM = 'abc';
+      process.env.AWF_RATE_LIMIT_RPH = 'xyz';
+      process.env.AWF_RATE_LIMIT_BYTES_PM = '';
+      const limiter = create();
+      expect(limiter.rpm).toBe(600);
+      expect(limiter.rph).toBe(1000);
+      expect(limiter.bytesPm).toBe(50 * 1024 * 1024);
+    });
+
+    it('should default to disabled when env vars are not set', () => {
       delete process.env.AWF_RATE_LIMIT_RPM;
       delete process.env.AWF_RATE_LIMIT_RPH;
       delete process.env.AWF_RATE_LIMIT_BYTES_PM;
       delete process.env.AWF_RATE_LIMIT_ENABLED;
       const limiter = create();
-      expect(limiter.rpm).toBe(60);
+      expect(limiter.rpm).toBe(600);
       expect(limiter.rph).toBe(1000);
       expect(limiter.bytesPm).toBe(50 * 1024 * 1024);
-      expect(limiter.enabled).toBe(true);
+      expect(limiter.enabled).toBe(false);
     });
   });
 
 
@@ -46,12 +46,37 @@ const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
 const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
 const COPILOT_GITHUB_TOKEN = process.env.COPILOT_GITHUB_TOKEN;
 
+// Configurable Copilot API target host (supports GHES/GHEC / custom endpoints)
+// Priority: COPILOT_API_TARGET env var > auto-derive from GITHUB_SERVER_URL > default
+function deriveCopilotApiTarget() {
+  if (process.env.COPILOT_API_TARGET) {
+    return process.env.COPILOT_API_TARGET;
+  }
+  // For GitHub Enterprise Cloud (*.ghe.com) or GitHub Enterprise Server
+  // (any GITHUB_SERVER_URL that isn't https://github.com), route to the
+  // enterprise Copilot API endpoint instead of the individual one.
+  const serverUrl = process.env.GITHUB_SERVER_URL;
+  if (serverUrl) {
+    try {
+      const hostname = new URL(serverUrl).hostname;
+      if (hostname !== 'github.com') {
+        return 'api.enterprise.githubcopilot.com';
+      }
+    } catch {
+      // Invalid URL — fall through to default
+    }
+  }
+  return 'api.githubcopilot.com';
+}
+const COPILOT_API_TARGET = deriveCopilotApiTarget();
+
 // Squid proxy configuration (set via HTTP_PROXY/HTTPS_PROXY in docker-compose)
 const HTTPS_PROXY = process.env.HTTPS_PROXY || process.env.HTTP_PROXY;
 
 logRequest('info', 'startup', {
   message: 'Starting AWF API proxy sidecar',
   squid_proxy: HTTPS_PROXY || 'not configured',
+  copilot_api_target: COPILOT_API_TARGET,
   providers: {
     openai: !!OPENAI_API_KEY,
     anthropic: !!ANTHROPIC_API_KEY,
@@ -72,7 +97,11 @@ if (!proxyAgent) {
 function checkRateLimit(req, res, provider, requestBytes) {
   const check = limiter.check(provider, requestBytes);
   if (!check.allowed) {
-    const requestId = req.headers['x-request-id'] || generateRequestId();
+    const clientRequestId = req.headers['x-request-id'];
+    const requestId = (typeof clientRequestId === 'string' &&
+      clientRequestId.length <= 128 &&
+      /^[\w\-\.]+$/.test(clientRequestId))
+      ? clientRequestId : generateRequestId();
     const limitLabels = { rpm: 'requests per minute', rph: 'requests per hour', bytes_pm: 'bytes per minute' };
     const windowLabel = limitLabels[check.limitType] || check.limitType;
 
@@ -111,8 +140,14 @@ function checkRateLimit(req, res, provider, requestBytes) {
 /**
  * Forward a request to the target API, injecting auth headers and routing through Squid.
  */
+/** Validate that a request ID is safe (alphanumeric, dashes, dots, max 128 chars). */
+function isValidRequestId(id) {
+  return typeof id === 'string' && id.length <= 128 && /^[\w\-\.]+$/.test(id);
+}
+
 function proxyRequest(req, res, targetHost, injectHeaders, provider) {
-  const requestId = req.headers['x-request-id'] || generateRequestId();
+  const clientRequestId = req.headers['x-request-id'];
+  const requestId = isValidRequestId(clientRequestId) ? clientRequestId : generateRequestId();
   const startTime = Date.now();
 
   // Propagate request ID back to the client and forward to upstream
@@ -153,6 +188,8 @@ function proxyRequest(req, res, targetHost, injectHeaders, provider) {
 
   // Handle client-side errors (e.g. aborted connections)
   req.on('error', (err) => {
+    if (errored) return; // Prevent double handling
+    errored = true;
     const duration = Date.now() - startTime;
     metrics.gaugeDec('active_requests', { provider });
     metrics.increment('requests_errors_total', { provider });
@@ -175,9 +212,10 @@ function proxyRequest(req, res, targetHost, injectHeaders, provider) {
   const chunks = [];
   let totalBytes = 0;
   let rejected = false;
+  let errored = false;
 
   req.on('data', chunk => {
-    if (rejected) return;
+    if (rejected || errored) return;
     totalBytes += chunk.length;
     if (totalBytes > MAX_BODY_SIZE) {
       rejected = true;
@@ -204,7 +242,7 @@ function proxyRequest(req, res, targetHost, injectHeaders, provider) {
   });
 
   req.on('end', () => {
-    if (rejected) return;
+    if (rejected || errored) return;
     const body = Buffer.concat(chunks);
     const requestBytes = body.length;
 
@@ -356,7 +394,8 @@ const HEALTH_PORT = 10000;
 if (OPENAI_API_KEY) {
   const server = http.createServer((req, res) => {
     if (handleManagementEndpoint(req, res)) return;
-    if (checkRateLimit(req, res, 'openai', 0)) return;
+    const contentLength = parseInt(req.headers['content-length'], 10) || 0;
+    if (checkRateLimit(req, res, 'openai', contentLength)) return;
 
     proxyRequest(req, res, 'api.openai.com', {
       'Authorization': `Bearer ${OPENAI_API_KEY}`,
@@ -389,7 +428,8 @@ if (ANTHROPIC_API_KEY) {
       return;
     }
 
-    if (checkRateLimit(req, res, 'anthropic', 0)) return;
+    const contentLength = parseInt(req.headers['content-length'], 10) || 0;
+    if (checkRateLimit(req, res, 'anthropic', contentLength)) return;
 
     // Only set anthropic-version as default; preserve agent-provided version
     const anthropicHeaders = { 'x-api-key': ANTHROPIC_API_KEY };
@@ -415,9 +455,10 @@ if (COPILOT_GITHUB_TOKEN) {
       return;
     }
 
-    if (checkRateLimit(req, res, 'copilot', 0)) return;
+    const contentLength = parseInt(req.headers['content-length'], 10) || 0;
+    if (checkRateLimit(req, res, 'copilot', contentLength)) return;
 
-    proxyRequest(req, res, 'api.githubcopilot.com', {
+    proxyRequest(req, res, COPILOT_API_TARGET, {
       'Authorization': `Bearer ${COPILOT_GITHUB_TOKEN}`,
     }, 'copilot');
   });