fix (52) Empty reply from server errors caused by unsupported encodings

Matias Salinas · Matias Salinas · commit 0fb84dbaaea2 · 2025-07-17T19:08:08.000-04:00
diff --git a/Dockerfile b/Dockerfile
@@ -4,4 +4,4 @@ ARG BINARY
 
 COPY --chmod=755 ${BINARY} /cachebolt
 
-ENTRYPOINT ["/cachebolt"]
+ENTRYPOINT ["/cachebolt"]
diff --git a/config.yaml b/config.yaml
@@ -1,23 +1,56 @@
-app_id: gitlab-proxy
+# 🔧 Unique identifier for this CacheBolt instance
+app_id: my-service
+
+# 🌐 Port to bind the main proxy server (default: 3000)
 proxy_port: 3000
+
+# 🛠️ Port to bind the admin interface and /metrics (default: 3001)
 admin_port: 3001
+
+# 🚦 Maximum number of concurrent outbound requests to the downstream service
 max_concurrent_requests: 200
 
-downstream_base_url: https://gitlab.falabella.tech
-storage_backend: local
+# 🌐 Base URL of the upstream API/backend to which requests are proxied
+downstream_base_url: http://localhost:4000
+
+# 💾 Backend used for persistent cache storage
+# Available options: gcs, s3, azure, local
+storage_backend: s3
+
+# 🪣 Name of the Google Cloud Storage bucket (used if storage_backend is 'gcs')
+gcs_bucket: cachebolt
 
-gcs_bucket: ""
-s3_bucket: ""
-azure_container: ""
+# 🪣 Name of the Amazon S3 bucket (used if storage_backend is 's3')
+s3_bucket: my-cachebolt-bucket
 
+# 📦 Name of the Azure Blob Storage container (used if storage_backend is 'azure')
+azure_container: cachebolt-container
+
+# 🧠 Memory cache configuration
 cache:
+  # 🚨 System memory usage threshold (%) above which in-memory cache will start evicting entries
   memory_threshold: 90
+
+  # 🔁 Percentage of requests (per key) that should trigger a refresh from backend instead of using cache
+  # Example: 10% means 1 in every 10 requests will bypass cache
   refresh_percentage: 1
+
+  # 🗑️ Cache lifetime before refresh the key
   ttl_seconds: 10
 
+# ⚠️ Latency-based failover configuration
 latency_failover:
+  # ⌛ Default maximum allowed latency in milliseconds for any request
   default_max_latency_ms: 1000
 
+  # 🛣️ Path-specific latency thresholds
+  path_rules:
+    - pattern: "^/api/v1/products/.*"
+      max_latency_ms: 15000
+    - pattern: "^/auth/.*"
+      max_latency_ms: 10000
+
+# 🚫 List of request headers to ignore when computing cache keys (case-insensitive)
 ignored_headers:
   - postman-token
-  - if-none-match
+  - if-none-match
diff --git a/src/proxy.rs b/src/proxy.rs
@@ -86,6 +86,11 @@ pub async fn proxy_handler(req: Request<Body>) -> impl IntoResponse {
     let uri = req.uri().to_string();
     tracing::debug!("🔗 Received request for URI: {}", uri);
 
+    tracing::debug!("🔎 Incoming request headers:");
+    for (k, v) in req.headers().iter() {
+        tracing::debug!("    {}: {:?}", k, v);
+    }
+
     // Increment total request counter for each URI
     counter!("cachebolt_proxy_requests_total", "uri" => uri.clone()).increment(1);
 
@@ -173,9 +178,11 @@ pub async fn proxy_handler(req: Request<Body>) -> impl IntoResponse {
                     }
 
                     // Split response into parts
-                    let (parts, body) = resp.into_parts();
+                    let (mut parts, body) = resp.into_parts();
                     let body_bytes = hyper::body::to_bytes(body).await.unwrap_or_default();
 
+                    parts.headers.remove("content-length");
+
                     let headers_vec = parts
                         .headers
                         .iter()
@@ -214,7 +221,10 @@ pub async fn proxy_handler(req: Request<Body>) -> impl IntoResponse {
                             );
                         }
                     } else {
-                        tracing::info!("⏩ Cache bypass activated for '{}' due to client header", uri);
+                        tracing::info!(
+                            "⏩ Cache bypass activated for '{}' due to client header",
+                            uri
+                        );
                     }
 
                     Response::from_parts(parts, Body::from(body_bytes))
@@ -308,11 +318,22 @@ pub fn hash_uri(uri: &str) -> String {
 }
 
 /// Sends an outbound GET request to the downstream backend
+/// Sends an outbound GET request to the downstream backend, forwarding all headers except 'accept-encoding'.
+/// This prevents curl: (52) Empty reply from server errors caused by unsupported encodings.
+///
+/// # Arguments
+/// - `uri`: The path to append to the downstream base URL.
+/// - `original_req`: The incoming Axum request, from which headers are forwarded.
+///
+/// # Returns
+/// - `Ok(Response)` with the downstream response if successful.
+/// - `Err(())` if the downstream call fails or the request could not be built.
 pub async fn forward_request(uri: &str, original_req: Request<Body>) -> Result<Response<Body>, ()> {
+    // Get the config and build the downstream full URL
     let cfg = CONFIG.get().unwrap();
     let full_url = format!("{}{}", cfg.downstream_base_url, uri);
 
-    // Log scheme/host/path para debug (opcional, pero muy útil)
+    // Debug: Log the scheme, host, and path of the downstream URL
     if let Ok(parsed_url) = url::Url::parse(&full_url) {
         tracing::info!(
             "🌐 Downstream request: scheme='{}' host='{}' path='{}'",
@@ -322,14 +343,33 @@ pub async fn forward_request(uri: &str, original_req: Request<Body>) -> Result<R
         );
     }
 
+    // Parse downstream_base_url to extract the host (domain)
+    let downstream_host = url::Url::parse(&cfg.downstream_base_url)
+        .ok()
+        .and_then(|u| u.host_str().map(|s| s.to_string()))
+        .unwrap_or_else(|| "".to_string());
+
+    // Build the request, starting with the URL and GET method
     let mut builder = Request::builder().uri(full_url.clone()).method("GET");
 
-    // Copia todos los headers originales
+    // Copy all headers from the incoming request,
+    // except for 'accept-encoding' and 'host'
+    // (We want to control the Host header for SNI/proxying, and avoid content-encoding issues.)
     for (key, value) in original_req.headers().iter() {
+        if key.as_str().eq_ignore_ascii_case("accept-encoding")
+            || key.as_str().eq_ignore_ascii_case("host")
+        {
+            continue;
+        }
         builder = builder.header(key, value);
     }
 
-    // Construye el request
+    // Inject the Host header, if it was successfully extracted from the downstream_base_url
+    if !downstream_host.is_empty() {
+        builder = builder.header("Host", downstream_host);
+    }
+
+    // Build the final request object with empty body
     let req = match builder.body(Body::empty()) {
         Ok(req) => req,
         Err(e) => {
@@ -338,7 +378,7 @@ pub async fn forward_request(uri: &str, original_req: Request<Body>) -> Result<R
         }
     };
 
-    // Ejecuta la request, maneja errores con logs detallados
+    // Send the HTTP request to the downstream service
     match HTTP_CLIENT.request(req).await {
         Ok(resp) => Ok(resp),
         Err(e) => {

Original file line number	Diff line number	Diff line change
`@@ -4,4 +4,4 @@ ARG BINARY`
`4`	`4`
`5`	`5`	`COPY --chmod=755 ${BINARY} /cachebolt`
`6`	`6`
`7`		`-ENTRYPOINT ["/cachebolt"]`
	`7`	`+ENTRYPOINT ["/cachebolt"]`