feat(relieve): discard request if the caller is not waiting for the answer anymore*

oOraph · oOraph · commit 935e4f4358ad · 2025-09-19T09:53:22.000+02:00
When behind a proxy this requires the proxy to close the connection to be effective though

Signed-off-by: Raphael Glon &lt;oOraph@users.noreply.github.com&gt;
diff --git a/requirements.txt b/requirements.txt
@@ -20,3 +20,4 @@ torch==2.5.1
 torchvision
 torchaudio
 peft==0.15.1
+psutil>=6.0.0
diff --git a/src/huggingface_inference_toolkit/utils.py b/src/huggingface_inference_toolkit/utils.py
@@ -1,7 +1,11 @@
 import importlib.util
+import ipaddress
 import sys
 from pathlib import Path
 
+import psutil
+from starlette.requests import Request
+
 from huggingface_inference_toolkit.const import HF_DEFAULT_PIPELINE_NAME, HF_MODULE_NAME
 from huggingface_inference_toolkit.logging import logger
 
@@ -99,3 +103,59 @@ def convert_params_to_int_or_bool(params):
         if v == "true":
             params[k] = True
     return params
+
+
+def already_left(request: Request) -> bool:
+    """
+    Check if the caller has already left without waiting for the answer to come. This can help during burst to relieve
+    the pressure on the worker by cancelling jobs whose results don't matter as they won't be fetched anyway
+    :param request:
+    :return: bool
+    """
+    # NOTE: Starlette method request.is_disconnected is totally broken, consumes the payload, does not return
+    # the correct status. So we use the good old way to identify if the caller is still there.
+    # In any case, if we are not sure, we return False
+    logger.info("Checking if request caller already left")
+    try:
+        client = request.client
+        host = client.host
+        if not host:
+            return False
+
+        port = int(client.port)
+        host = ipaddress.ip_address(host)
+
+        if port <= 0 or port > 65535:
+            logger.warning("Unexpected source port format for caller %s", port)
+            return False
+        counter = 0
+        for connection in psutil.net_connections(kind="tcp"):
+            counter += 1
+            if connection.status != "ESTABLISHED":
+                continue
+            if not connection.raddr:
+                continue
+            if int(connection.raddr.port) != port:
+                continue
+            if (
+                    not connection.raddr.ip
+                    or ipaddress.ip_address(connection.raddr.ip) != host
+            ):
+                continue
+            logger.info(
+                "Found caller connection still established, caller is most likely still there, %s",
+                connection,
+            )
+            return False
+    except Exception as e:
+        logger.warning(
+            "Unexpected error while checking if caller already left, assuming still there"
+        )
+        logger.exception(e)
+        return False
+
+    logger.info(
+        "%d connections checked. No connection found matching to the caller, probably left",
+        counter,
+    )
+    return True
diff --git a/src/huggingface_inference_toolkit/webservice_starlette.py b/src/huggingface_inference_toolkit/webservice_starlette.py
@@ -27,7 +27,7 @@
 from huggingface_inference_toolkit.logging import logger
 from huggingface_inference_toolkit.serialization.base import ContentType
 from huggingface_inference_toolkit.serialization.json_utils import Jsoner
-from huggingface_inference_toolkit.utils import convert_params_to_int_or_bool
+from huggingface_inference_toolkit.utils import convert_params_to_int_or_bool, already_left
 from huggingface_inference_toolkit.vertex_ai_utils import _load_repository_from_gcs
 
 INFERENCE_HANDLERS = {}
@@ -101,6 +101,11 @@ async def metrics(request):
 
 async def predict(request):
     global INFERENCE_HANDLERS
+
+    if os.getenv("DISCARD_LEFT", "0").lower() in ["1", "true", "yes"] and already_left(request):
+        logger.info("Discarding request as the caller already left")
+        return Response(status_code=204)
+
     if not MODEL_DOWNLOADED:
         with MODEL_DL_LOCK:
             _eager_model_dl()