feat: add GPU support (#13)

hspedro · web-flow · commit 235e5d7777e3 · 2025-03-14T10:22:07.000-03:00
* chore(docker): make all envs customizable
* chore(otel): fix jaeger exporter config
* chore(make): prefer docker compose as plugin
* chore: remove deprecated doc and log
* chore(docker): add GPU driver support
* chore: bump to v0.5.0
diff --git a/Dockerfile b/Dockerfile
@@ -14,6 +14,10 @@ COPY babeltron ./babeltron
 RUN poetry config virtualenvs.create false \
     && poetry install --without dev --no-interaction --no-ansi
 
+# Install CUDA-enabled PyTorch (replacing the CPU-only version)
+RUN pip uninstall -y torch torchvision torchaudio && \
+    pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+
 FROM python:3.10-slim
 
 WORKDIR /app
@@ -29,6 +33,13 @@ COPY --from=builder /app/babeltron ./babeltron
 COPY docker-entrypoint.sh /app/docker-entrypoint.sh
 RUN chmod +x /app/docker-entrypoint.sh
 
+# We don't need to install CUDA libraries in the container
+# The NVIDIA container runtime will provide GPU access
+# Just ensure we have basic dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+
 ENV PYTHONPATH=/app
 ENV MODEL_PATH=/models
 ENV PORT=8000
diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: check-poetry install test lint format help system-deps coverage coverage-html download-model download-model-m2m-small download-model-m2m-medium download-model-m2m-large download-model-nllb download-model-nllb-small download-model-nllb-medium download-model-nllb-large serve serve-prod docker-build docker-run docker-compose-up docker-compose-down pre-commit-install pre-commit-run docker-build-with-model docker-up docker-down
+.PHONY: check-poetry install test lint format help system-deps coverage coverage-html download-model download-model-m2m-small download-model-m2m-medium download-model-m2m-large download-model-nllb download-model-nllb-small download-model-nllb-medium download-model-nllb-large serve serve-prod docker-build docker-run docker compose-up docker compose-down pre-commit-install pre-commit-run docker-build-with-model docker-up docker-down
 
 # Define model path variable with default value, can be overridden by environment
 MODEL_PATH ?= ./models
@@ -181,7 +181,7 @@ docker-run: ## Run Docker container with model volume mount
 	@echo "Running Docker container..."
 	@docker run -p $(PORT):$(PORT) -v $(shell pwd)/$(MODEL_PATH):/models -e MODEL_PATH=/models -e BABELTRON_BABELTRON_MODEL_TYPE=$(BABELTRON_MODEL_TYPE) -e PORT=$(PORT) $(IMAGE_NAME):latest
 
-docker-up: ## Build and start services with docker-compose
+docker-up: ## Build and start services with docker compose
 	@echo "Checking for model files..."
 	@if [ ! -d "$(MODEL_PATH)" ] || [ -z "$(shell ls -A $(MODEL_PATH) 2>/dev/null)" ]; then \
 		echo "No model files found in $(MODEL_PATH) directory."; \
@@ -217,18 +217,18 @@ docker-up: ## Build and start services with docker-compose
 			echo "Model download skipped. Container may not work properly."; \
 		fi; \
 	fi
-	@echo "Building and starting services with docker-compose..."
-	@BABELTRON_MODEL_TYPE=$(BABELTRON_MODEL_TYPE) docker-compose up -d --build
+	@echo "Building and starting services with docker compose..."
+	@BABELTRON_MODEL_TYPE=$(BABELTRON_MODEL_TYPE) docker compose up -d --build
 	@echo "Services started successfully. API available at http://localhost:8000"
 	@echo "API documentation available at http://localhost:8000/docs"
 
 docker-down:
-	@echo "Stopping docker-compose services..."
-	@docker-compose down
+	@echo "Stopping docker compose services..."
+	@docker compose down
 
-docker-compose-down: ## Stop Docker Compose services
+docker compose-down: ## Stop Docker Compose services
 	@echo "Stopping Docker Compose services..."
-	@PORT=$(PORT) docker-compose down
+	@PORT=$(PORT) docker compose down
 	@echo "Services stopped successfully."
 
 pre-commit-install:
@@ -238,7 +238,7 @@ pre-commit-install:
 pre-commit-run:
 	pre-commit run --all-files
 
-docker-compose-up: ## Start services with Docker Compose
+docker compose-up: ## Start services with Docker Compose
 	@echo "Starting services with Docker Compose..."
-	@PORT=$(PORT) docker-compose up -d
+	@PORT=$(PORT) docker compose up -d
 	@echo "Services started successfully. API is available at http://localhost:$(PORT)/api/docs"
diff --git a/README.md b/README.md
@@ -275,7 +275,6 @@ make docker-compose-down
 The following environment variables can be used to configure the application:
 
 - `MODEL_PATH`: Path to the model directory (default: `./models`)
-- `MODEL_TYPE`: Type of model to use (`m2m` or `nllb`, default: `m2m`)
 - `MODEL_SIZE`: Size of model to use (`small`, `medium`, or `large`, default: `small`)
 - `PORT`: Port to run the API server on (default: `8000`)
 - `WORKER_COUNT`: Number of worker processes to use (default: `1`)
diff --git a/babeltron/app/models/m2m100.py b/babeltron/app/models/m2m100.py
@@ -33,12 +33,10 @@ def get_model_path() -> str:
 
     # First, look for M2M100 model directories
     for base_path in possible_paths:
-        print(f"Checking base path: {base_path}")
         if not base_path.exists():
             continue
 
         m2m_dirs = list(base_path.glob("m2m*"))
-        print(f"Checking m2m_dirs path: {m2m_dirs}")
         if m2m_dirs:
             # Use the first matching directory
             logging.info(f"Found M2M100 model directory: {m2m_dirs[0]}")
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -10,17 +10,26 @@ services:
     volumes:
       - ./models:/models
     environment:
-      - MODEL_PATH=/models
+      - MODEL_PATH=${MODEL_PATH:-/models}
       - BABELTRON_MODEL_TYPE=${BABELTRON_MODEL_TYPE:-m2m100}
-      - OTLP_MODE=otlp-grpc
-      - OTEL_SERVICE_NAME=babeltron
-      - OTLP_COLLECTOR_HOST=otel-collector
-      - OTLP_COLLECTOR_PORT=4317
+      - OTLP_MODE=${OTLP_MODE:-otlp-grpc}
+      - OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME:-babeltron}
+      - OTLP_COLLECTOR_HOST=${OTEL_COLLECTOR_HOST:-otel-collector}
+      - OTLP_COLLECTOR_PORT=${OTEL_COLLECTOR_PORT:-4317}
       - AUTH_USERNAME=${AUTH_USERNAME}
       - AUTH_PASSWORD=${AUTH_PASSWORD}
-      - WORKER_COUNT=2
+      - WORKER_COUNT=${WORKER_COUNT:-2}
       - PORT=${PORT:-8000}
+      - NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-all}
+      - CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
     restart: unless-stopped
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:${PORT:-8000}/healthz"]
       interval: 30s
diff --git a/etc/otel-collector-config.yaml b/etc/otel-collector-config.yaml
@@ -17,8 +17,8 @@ processors:
     spike_limit_percentage: 25
 
 exporters:
-  otlp:
-    endpoint: jaeger:4317
+  jaeger:
+    endpoint: jaeger:14250
     tls:
       insecure: true
   debug:
@@ -38,4 +38,4 @@ service:
     traces:
       receivers: [otlp]
       processors: [memory_limiter, batch]
-      exporters: [otlp, debug]
+      exporters: [jaeger, logging, debug]
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "babeltron"
-version = "0.4.0"
+version = "0.5.0"
 dynamic = ["version"]
 description = "A Python-based REST API that leverages single multilingual models like mBERT to provide efficient text translation services"
 authors = [