privapps
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 9 additions & 5 deletions b/‎.github/workflows/release.yml‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎README.md‎
Lines changed: 92 additions & 4 deletions b/‎README.md‎
Lines changed: 92 additions & 4 deletions
diff --git a/‎auth.go‎
Lines changed: 10 additions & 13 deletions b/‎auth.go‎
Lines changed: 10 additions & 13 deletions
diff --git a/‎cli.go‎
Lines changed: 29 additions & 7 deletions b/‎cli.go‎
Lines changed: 29 additions & 7 deletions
diff --git a/‎config.example.json‎
Lines changed: 15 additions & 0 deletions b/‎config.example.json‎
Lines changed: 15 additions & 0 deletions
@@ -128,15 +128,19 @@ jobs:
           # Make the binary executable (important for Unix systems)
           chmod +x "$BINARY_NAME"
           
-          echo "Built binary: $BINARY_NAME"
-          ls -la "$BINARY_NAME"
+          # Gzip the binary
+          GZ_BINARY_NAME="$BINARY_NAME.gz"
+          gzip -c "$BINARY_NAME" > "$GZ_BINARY_NAME"
+          
+          echo "Built and gzipped binary: $GZ_BINARY_NAME"
+          ls -la "$GZ_BINARY_NAME"
 
       - name: Upload Release Asset
         uses: actions/upload-release-asset@v1
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
           upload_url: ${{ needs.release.outputs.upload_url }}
-          asset_path: ./github-copilot-svcs-${{ matrix.goos }}-${{ matrix.goarch }}${{ matrix.suffix }}
-          asset_name: github-copilot-svcs-${{ matrix.goos }}-${{ matrix.goarch }}${{ matrix.suffix }}
-          asset_content_type: application/octet-stream
+          asset_path: ./github-copilot-svcs-${{ matrix.goos }}-${{ matrix.goarch }}${{ matrix.suffix }}.gz
+          asset_name: github-copilot-svcs-${{ matrix.goos }}-${{ matrix.goarch }}${{ matrix.suffix }}.gz
+          asset_content_type: application/gzip
@@ -1,4 +1,4 @@
-# GitHub Copilot SVCS Proxy
+# GitHub Copilot Service Proxy
 
 This project provides a reverse proxy for GitHub Copilot, exposing OpenAI-compatible endpoints for use with tools and clients that expect the OpenAI API. It follows the authentication and token management approach used by [OpenCode](https://github.com/sst/opencode).
 
@@ -21,6 +21,8 @@ This project provides a reverse proxy for GitHub Copilot, exposing OpenAI-compat
 - **Graceful Shutdown**: Proper signal handling and graceful server shutdown
 - **Comprehensive Logging**: Request/response logging for debugging and monitoring
 - **Enhanced CLI Commands**: Status monitoring, manual token refresh, and detailed configuration display
+- **Production-Ready Performance**: HTTP connection pooling, circuit breaker, request coalescing, and memory optimization
+- **Monitoring & Profiling**: Built-in pprof endpoints for memory, CPU, and goroutine analysis
 
 ## Downloads
 
@@ -38,6 +40,38 @@ Releases are automatically created when code is merged to the `main` branch:
 - Cross-platform binaries are built and attached to each release
 - Release notes include download links for all supported platforms
 
+## Performance & Production Features
+
+This service includes enterprise-grade performance optimizations:
+
+### 🚀 HTTP Server Optimizations
+- **Connection Pooling**: Shared HTTP client with configurable connection limits (100 max idle, 20 per host)
+- **Configurable Timeouts**: Fully customizable timeout settings via `config.json` for all server operations
+- **Streaming Support**: Read (30s), Write (300s), and Idle (120s) timeouts optimized for AI chat streaming
+- **Long Response Handling**: HTTP client and proxy context timeouts support up to 300s (5 minutes) for extended AI conversations
+- **Request Limits**: 5MB request body size limit to prevent memory exhaustion
+- **Advanced Transport**: Configurable dial timeout (10s), TLS handshake timeout (10s), keep-alive (30s)
+
+### 🔄 Reliability & Concurrency
+- **Circuit Breaker**: Automatic failure detection and recovery (5 failure threshold, 30s timeout)
+- **Context Propagation**: Request contexts with 25s timeout and proper cancellation
+- **Request Coalescing**: Deduplicates identical concurrent requests to models endpoint
+- **Exponential Backoff**: Enhanced retry logic with circuit breaker integration
+- **Worker Pool**: Concurrent request processing with dedicated worker goroutines (CPU*2 workers)
+
+### 💾 Resource Management
+- **Buffer Pooling**: sync.Pool for request/response buffer reuse to reduce GC pressure
+- **Memory Optimization**: Streaming support with 32KB buffers for large responses
+- **Graceful Shutdown**: Proper resource cleanup and coordinated shutdown with worker pool termination
+- **Shared Clients**: Centralized HTTP client eliminates resource duplication
+- **Worker Pool Management**: Automatic worker lifecycle management with graceful termination
+
+### 📊 Monitoring & Observability
+- **Profiling Endpoints**: `/debug/pprof/*` for memory, CPU, and goroutine analysis
+- **Enhanced Logging**: Circuit breaker state, request coalescing, worker pool metrics, and performance data
+- **Health Monitoring**: Detailed `/health` endpoint for load balancer integration
+- **Production Metrics**: Built-in support for operational monitoring and worker pool status
+
 ## Quickstart with Makefile
 
 If you have `make` installed, you can build, run, and test the project easily:
@@ -60,14 +94,21 @@ make build
 go build -o github-copilot-svcs
 ```
 
-### 2. First Time Setup & Authentication
+### 2. Optional: Configure Timeouts
+```bash
+# Copy example config and customize timeout values
+cp config.example.json ~/.local/share/github-copilot-svcs/config.json
+# Edit the timeouts section as needed
+```
+
+### 3. First Time Setup & Authentication
 ```bash
 make auth
 # or manually:
 ./github-copilot-svcs auth
 ```
 
-### 3. Start the Proxy Server
+### 4. Start the Proxy Server
 ```bash
 make run
 # or manually:
@@ -139,6 +180,15 @@ GET http://localhost:8081/v1/models
 GET http://localhost:8081/health
 ```
 
+### Profiling Endpoints (Production Monitoring)
+```bash
+GET http://localhost:8081/debug/pprof/          # Overview of available profiles
+GET http://localhost:8081/debug/pprof/heap      # Memory heap profile
+GET http://localhost:8081/debug/pprof/goroutine # Goroutine profile
+GET http://localhost:8081/debug/pprof/profile   # CPU profile (30s sampling)
+GET http://localhost:8081/debug/pprof/trace     # Execution trace
+```
+
 ## Reliability & Error Handling
 
 ### Automatic Token Management
@@ -182,7 +232,19 @@ The configuration is stored in `~/.local/share/github-copilot-svcs/config.json`:
   "github_token": "gho_...",
   "copilot_token": "ghu_...",
   "expires_at": 1720000000,
-  "refresh_in": 1500
+  "refresh_in": 1500,
+  "timeouts": {
+    "http_client": 300,
+    "server_read": 30,
+    "server_write": 300,
+    "server_idle": 120,
+    "proxy_context": 300,
+    "circuit_breaker": 30,
+    "keep_alive": 30,
+    "tls_handshake": 10,
+    "dial_timeout": 10,
+    "idle_conn_timeout": 90
+  }
 }
 ```
 
@@ -194,6 +256,32 @@ The configuration is stored in `~/.local/share/github-copilot-svcs/config.json`:
 - `expires_at`: Unix timestamp when the Copilot token expires
 - `refresh_in`: Seconds until token should be refreshed (typically 1500 = 25 minutes)
 
+### Timeout Configuration
+
+All timeout values are specified in seconds and have sensible defaults:
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `http_client` | 300 | HTTP client timeout for outbound requests to GitHub Copilot API |
+| `server_read` | 30 | Server timeout for reading incoming requests |
+| `server_write` | 300 | Server timeout for writing responses (increased for streaming) |
+| `server_idle` | 120 | Server timeout for idle connections |
+| `proxy_context` | 300 | Request context timeout for proxy operations |
+| `circuit_breaker` | 30 | Circuit breaker recovery timeout when API is failing |
+| `keep_alive` | 30 | TCP keep-alive timeout for HTTP connections |
+| `tls_handshake` | 10 | TLS handshake timeout |
+| `dial_timeout` | 10 | Connection dial timeout |
+| `idle_conn_timeout` | 90 | Idle connection timeout in connection pool |
+
+**Streaming Support**: The service is optimized for long-running streaming chat completions with timeouts up to 300 seconds (5 minutes) to support extended AI conversations.
+
+**Custom Configuration**: You can copy `config.example.json` as a starting point and modify timeout values based on your environment:
+
+```bash
+cp config.example.json ~/.local/share/github-copilot-svcs/config.json
+# Edit the timeouts section as needed
+```
+
 ## Authentication Flow
 
 The authentication follows GitHub Copilot's OAuth device flow:
 
@@ -17,11 +17,11 @@ const (
 	copilotAPIKeyURL     = "https://api.github.com/copilot_internal/v2/token"
 	copilotClientID      = "Iv1.b507a08c87ecfe98"
 	copilotScope         = "read:user"
-	userAgent           = "GitHubCopilotChat/0.26.7"
-	
+	userAgent            = "GitHubCopilotChat/0.26.7"
+
 	// Retry configuration
 	maxRefreshRetries = 3
-	baseRetryDelay   = 2 // seconds
+	baseRetryDelay    = 2 // seconds
 )
 
 type deviceCodeResponse struct {
@@ -53,7 +53,7 @@ func authenticate(cfg *Config) error {
 		log.Printf("Token still valid: expires in %d seconds", cfg.ExpiresAt-now)
 		return nil // Already authenticated
 	}
-	
+
 	if cfg.CopilotToken != "" {
 		log.Printf("Token expired or expiring soon: expires in %d seconds, triggering re-auth", cfg.ExpiresAt-now)
 	} else {
@@ -72,8 +72,7 @@ func authenticate(cfg *Config) error {
 	body := fmt.Sprintf(`{"client_id":"%s","scope":"%s"}`, copilotClientID, copilotScope)
 	req.Body = io.NopCloser(strings.NewReader(body))
 
-	client := &http.Client{}
-	resp, err := client.Do(req)
+	resp, err := sharedHTTPClient.Do(req)
 	if err != nil {
 		return err
 	}
@@ -127,8 +126,7 @@ func pollForGitHubToken(deviceCode string, interval int) (string, error) {
 			copilotClientID, deviceCode)
 		req.Body = io.NopCloser(strings.NewReader(body))
 
-		client := &http.Client{}
-		resp, err := client.Do(req)
+		resp, err := sharedHTTPClient.Do(req)
 		if err != nil {
 			continue
 		}
@@ -160,8 +158,7 @@ func getCopilotToken(githubToken string) (string, int64, int64, error) {
 	req.Header.Set("Authorization", "token "+githubToken)
 	req.Header.Set("User-Agent", userAgent)
 
-	client := &http.Client{}
-	resp, err := client.Do(req)
+	resp, err := sharedHTTPClient.Do(req)
 	if err != nil {
 		return "", 0, 0, err
 	}
@@ -188,14 +185,14 @@ func refreshToken(cfg *Config) error {
 	// Retry with exponential backoff
 	for attempt := 1; attempt <= maxRefreshRetries; attempt++ {
 		log.Printf("Attempting to refresh Copilot token (attempt %d/%d)", attempt, maxRefreshRetries)
-		
+
 		copilotToken, expiresAt, refreshIn, err := getCopilotToken(cfg.GitHubToken)
 		if err != nil {
 			if attempt == maxRefreshRetries {
 				log.Printf("Token refresh failed after %d attempts: %v", maxRefreshRetries, err)
 				return err
 			}
-			
+
 			// Wait before retry with exponential backoff
 			waitTime := time.Duration(baseRetryDelay*attempt*attempt) * time.Second
 			log.Printf("Token refresh failed (attempt %d), retrying in %v: %v", attempt, waitTime, err)
@@ -210,6 +207,6 @@ func refreshToken(cfg *Config) error {
 
 		return saveConfig(cfg)
 	}
-	
+
 	return errors.New("maximum retry attempts exceeded")
 }
@@ -4,6 +4,7 @@ import (
 	"flag"
 	"fmt"
 	"net/http"
+	_ "net/http/pprof"
 	"os"
 	"time"
 )
@@ -27,6 +28,9 @@ func handleAuth() error {
 		return fmt.Errorf("failed to load config: %v", err)
 	}
 
+	// Initialize timeout configurations before any HTTP operations
+	initializeTimeouts(cfg)
+
 	fmt.Println("Starting GitHub Copilot authentication...")
 	if err := authenticate(cfg); err != nil {
 		return fmt.Errorf("authentication failed: %v", err)
@@ -51,13 +55,13 @@ func handleStatus() error {
 	now := getCurrentTime()
 	if cfg.CopilotToken != "" {
 		fmt.Printf("Authentication: ✓ Authenticated\n")
-		
+
 		timeUntilExpiry := cfg.ExpiresAt - now
 		if timeUntilExpiry > 0 {
 			minutes := timeUntilExpiry / 60
 			seconds := timeUntilExpiry % 60
 			fmt.Printf("Token expires: in %dm %ds (%d seconds)\n", minutes, seconds, timeUntilExpiry)
-			
+
 			// Show refresh timing
 			if cfg.RefreshIn > 0 {
 				refreshThreshold := cfg.RefreshIn / 5 // 20%
@@ -74,7 +78,7 @@ func handleStatus() error {
 			fmt.Printf("Token expires: ⚠️  EXPIRED (%d seconds ago)\n", -timeUntilExpiry)
 			fmt.Printf("Status: ❌ Token needs refresh\n")
 		}
-		
+
 		fmt.Printf("Has GitHub token: %t\n", cfg.GitHubToken != "")
 		if cfg.RefreshIn > 0 {
 			fmt.Printf("Refresh interval: %d seconds\n", cfg.RefreshIn)
@@ -115,6 +119,9 @@ func handleRun() error {
 		return fmt.Errorf("failed to load config: %v", err)
 	}
 
+	// Initialize timeout configurations before any HTTP operations
+	initializeTimeouts(cfg)
+
 	// Ensure we're authenticated
 	if err := ensureValidToken(cfg); err != nil {
 		return fmt.Errorf("authentication failed: %v", err)
@@ -126,15 +133,24 @@ func handleRun() error {
 	mux.HandleFunc("/v1/models", modelsHandler(cfg))
 	mux.HandleFunc("/v1/chat/completions", proxyHandler(cfg))
 	mux.HandleFunc("/health", healthHandler)
+	// Add pprof endpoints for profiling
+	mux.HandleFunc("/debug/pprof/", http.DefaultServeMux.ServeHTTP)
+	mux.HandleFunc("/debug/pprof/cmdline", http.DefaultServeMux.ServeHTTP)
+	mux.HandleFunc("/debug/pprof/profile", http.DefaultServeMux.ServeHTTP)
+	mux.HandleFunc("/debug/pprof/symbol", http.DefaultServeMux.ServeHTTP)
+	mux.HandleFunc("/debug/pprof/trace", http.DefaultServeMux.ServeHTTP)
 
 	port := cfg.Port
 	if port == 0 {
 		port = 8081
 	}
 
 	server := &http.Server{
-		Addr:    fmt.Sprintf(":%d", port),
-		Handler: mux,
+		Addr:         fmt.Sprintf(":%d", port),
+		Handler:      mux,
+		ReadTimeout:  time.Duration(cfg.Timeouts.ServerRead) * time.Second,
+		WriteTimeout: time.Duration(cfg.Timeouts.ServerWrite) * time.Second,
+		IdleTimeout:  time.Duration(cfg.Timeouts.ServerIdle) * time.Second,
 	}
 
 	setupGracefulShutdown(server)
@@ -158,6 +174,9 @@ func handleModels() error {
 		return fmt.Errorf("failed to load config: %v", err)
 	}
 
+	// Initialize timeout configurations before any HTTP operations
+	initializeTimeouts(cfg)
+
 	// Ensure we're authenticated
 	if err := ensureValidToken(cfg); err != nil {
 		return fmt.Errorf("authentication failed: %v", err)
@@ -189,6 +208,9 @@ func handleRefresh() error {
 		return fmt.Errorf("failed to load config: %v", err)
 	}
 
+	// Initialize timeout configurations before any HTTP operations
+	initializeTimeouts(cfg)
+
 	if cfg.CopilotToken == "" {
 		return fmt.Errorf("no token to refresh - run 'auth' command first")
 	}
@@ -199,13 +221,13 @@ func handleRefresh() error {
 	}
 
 	fmt.Printf("✅ Token refresh successful!\n")
-	
+
 	// Show new expiration time
 	now := getCurrentTime()
 	timeUntilExpiry := cfg.ExpiresAt - now
 	minutes := timeUntilExpiry / 60
 	seconds := timeUntilExpiry % 60
 	fmt.Printf("New token expires in: %dm %ds\n", minutes, seconds)
-	
+
 	return nil
 }
@@ -0,0 +1,15 @@
+{
+  "port": 8081,
+  "timeouts": {
+    "http_client": 300,
+    "server_read": 30,
+    "server_write": 300,
+    "server_idle": 120,
+    "proxy_context": 300,
+    "circuit_breaker": 30,
+    "keep_alive": 30,
+    "tls_handshake": 10,
+    "dial_timeout": 10,
+    "idle_conn_timeout": 90
+  }
+}