perf: set configurable thread for PyTorch operations

MrPandir · MrPandir · commit c778e8442147 · 2025-04-09T22:10:45.000+02:00
diff --git a/README.md b/README.md
@@ -105,6 +105,7 @@ curl -G 'http://localhost:8000/predict' --data-urlencode 'text=test text'
 - `TOXICITY_THRESHOLD` - the level below which the text will be considered toxic. Default: `0` - the argmax function is used. This is a float value, example: `-0.2`, `-0.05`, `1`.
 - `WEB_CONCURRENCY` - Number of worker processes. Defaults to the value of this environment variable if set, otherwise 1. Note: Not compatible with `--reload` option.
 - `METRICS_PREFIX` - Prefix for Prometheus metrics names. Default: `toxicity_detector`. Allows customization of metric names to avoid conflicts in a shared Prometheus setup.
+- `TORCH_THREADS` - Number of threads to use for PyTorch operations. Defaults to the value of this environment variable if set, otherwise the number of CPU cores.
 
 # Prometheus Metrics
 This project exposes several Prometheus metrics for monitoring the toxicity detector's performance and behavior. All metric names are prefixed with the value of the `METRICS_PREFIX` environment variable (default: `toxicity_detector`). Below is a list of available metrics and what they collect:
diff --git a/app/model.py b/app/model.py
@@ -11,16 +11,21 @@
 
 from .utils import clear_text, measure_time
 
-loop = asyncio.get_running_loop()
-loop.set_default_executor(ThreadPoolExecutor())
+cpu_cores = cpu_count()
 
 # Environment
 load_dotenv()
 
 model_path = environ.get("MODEL_PATH", "./model")
 threshold = float(environ.get("TOXICITY_THRESHOLD", 0))
 metrics_prefix = environ.get("METRICS_PREFIX", "toxicity_detector")
+num_threads = int(environ.get("TORCH_THREADS", cpu_cores or 1))
 
+# Configuring Thread Settings
+torch.set_num_threads(num_threads)
+
+loop = asyncio.get_running_loop()
+loop.set_default_executor(ThreadPoolExecutor())
 
 # Initialize Prometheus metrics
 MODEL_ERRORS = Counter(
@@ -96,7 +101,6 @@ def wrapper(*args, **kwargs):
 # Log PyTorch backends and devices information
 logger.info("CUDA available: %s", torch.cuda.is_available())
 logger.info("Current device: %s", device)
-cpu_cores = cpu_count()
 logger.info(
     "Number of CPU cores: %s", cpu_cores if cpu_cores is not None else "Unknown"
 )