Skip to content

Commit 235e5d7

Browse files
authored
feat: add GPU support (#13)
* chore(docker): make all envs customizable * chore(otel): fix jaeger exporter config * chore(make): prefer docker compose as plugin * chore: remove deprecated doc and log * chore(docker): add GPU driver support * chore: bump to v0.5.0
1 parent 10a6f30 commit 235e5d7

File tree

7 files changed

+40
-23
lines changed

7 files changed

+40
-23
lines changed

Dockerfile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ COPY babeltron ./babeltron
1414
RUN poetry config virtualenvs.create false \
1515
&& poetry install --without dev --no-interaction --no-ansi
1616

17+
# Install CUDA-enabled PyTorch (replacing the CPU-only version)
18+
RUN pip uninstall -y torch torchvision torchaudio && \
19+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
20+
1721
FROM python:3.10-slim
1822

1923
WORKDIR /app
@@ -29,6 +33,13 @@ COPY --from=builder /app/babeltron ./babeltron
2933
COPY docker-entrypoint.sh /app/docker-entrypoint.sh
3034
RUN chmod +x /app/docker-entrypoint.sh
3135

36+
# We don't need to install CUDA libraries in the container
37+
# The NVIDIA container runtime will provide GPU access
38+
# Just ensure we have basic dependencies
39+
RUN apt-get update && apt-get install -y --no-install-recommends \
40+
libgomp1 \
41+
&& rm -rf /var/lib/apt/lists/*
42+
3243
ENV PYTHONPATH=/app
3344
ENV MODEL_PATH=/models
3445
ENV PORT=8000

Makefile

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: check-poetry install test lint format help system-deps coverage coverage-html download-model download-model-m2m-small download-model-m2m-medium download-model-m2m-large download-model-nllb download-model-nllb-small download-model-nllb-medium download-model-nllb-large serve serve-prod docker-build docker-run docker-compose-up docker-compose-down pre-commit-install pre-commit-run docker-build-with-model docker-up docker-down
1+
.PHONY: check-poetry install test lint format help system-deps coverage coverage-html download-model download-model-m2m-small download-model-m2m-medium download-model-m2m-large download-model-nllb download-model-nllb-small download-model-nllb-medium download-model-nllb-large serve serve-prod docker-build docker-run docker compose-up docker compose-down pre-commit-install pre-commit-run docker-build-with-model docker-up docker-down
22

33
# Define model path variable with default value, can be overridden by environment
44
MODEL_PATH ?= ./models
@@ -181,7 +181,7 @@ docker-run: ## Run Docker container with model volume mount
181181
@echo "Running Docker container..."
182182
@docker run -p $(PORT):$(PORT) -v $(shell pwd)/$(MODEL_PATH):/models -e MODEL_PATH=/models -e BABELTRON_BABELTRON_MODEL_TYPE=$(BABELTRON_MODEL_TYPE) -e PORT=$(PORT) $(IMAGE_NAME):latest
183183

184-
docker-up: ## Build and start services with docker-compose
184+
docker-up: ## Build and start services with docker compose
185185
@echo "Checking for model files..."
186186
@if [ ! -d "$(MODEL_PATH)" ] || [ -z "$(shell ls -A $(MODEL_PATH) 2>/dev/null)" ]; then \
187187
echo "No model files found in $(MODEL_PATH) directory."; \
@@ -217,18 +217,18 @@ docker-up: ## Build and start services with docker-compose
217217
echo "Model download skipped. Container may not work properly."; \
218218
fi; \
219219
fi
220-
@echo "Building and starting services with docker-compose..."
221-
@BABELTRON_MODEL_TYPE=$(BABELTRON_MODEL_TYPE) docker-compose up -d --build
220+
@echo "Building and starting services with docker compose..."
221+
@BABELTRON_MODEL_TYPE=$(BABELTRON_MODEL_TYPE) docker compose up -d --build
222222
@echo "Services started successfully. API available at http://localhost:8000"
223223
@echo "API documentation available at http://localhost:8000/docs"
224224

225225
docker-down:
226-
@echo "Stopping docker-compose services..."
227-
@docker-compose down
226+
@echo "Stopping docker compose services..."
227+
@docker compose down
228228

229-
docker-compose-down: ## Stop Docker Compose services
229+
docker compose-down: ## Stop Docker Compose services
230230
@echo "Stopping Docker Compose services..."
231-
@PORT=$(PORT) docker-compose down
231+
@PORT=$(PORT) docker compose down
232232
@echo "Services stopped successfully."
233233

234234
pre-commit-install:
@@ -238,7 +238,7 @@ pre-commit-install:
238238
pre-commit-run:
239239
pre-commit run --all-files
240240

241-
docker-compose-up: ## Start services with Docker Compose
241+
docker compose-up: ## Start services with Docker Compose
242242
@echo "Starting services with Docker Compose..."
243-
@PORT=$(PORT) docker-compose up -d
243+
@PORT=$(PORT) docker compose up -d
244244
@echo "Services started successfully. API is available at http://localhost:$(PORT)/api/docs"

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,6 @@ make docker-compose-down
275275
The following environment variables can be used to configure the application:
276276

277277
- `MODEL_PATH`: Path to the model directory (default: `./models`)
278-
- `MODEL_TYPE`: Type of model to use (`m2m` or `nllb`, default: `m2m`)
279278
- `MODEL_SIZE`: Size of model to use (`small`, `medium`, or `large`, default: `small`)
280279
- `PORT`: Port to run the API server on (default: `8000`)
281280
- `WORKER_COUNT`: Number of worker processes to use (default: `1`)

babeltron/app/models/m2m100.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,10 @@ def get_model_path() -> str:
3333

3434
# First, look for M2M100 model directories
3535
for base_path in possible_paths:
36-
print(f"Checking base path: {base_path}")
3736
if not base_path.exists():
3837
continue
3938

4039
m2m_dirs = list(base_path.glob("m2m*"))
41-
print(f"Checking m2m_dirs path: {m2m_dirs}")
4240
if m2m_dirs:
4341
# Use the first matching directory
4442
logging.info(f"Found M2M100 model directory: {m2m_dirs[0]}")

docker-compose.yml

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,26 @@ services:
1010
volumes:
1111
- ./models:/models
1212
environment:
13-
- MODEL_PATH=/models
13+
- MODEL_PATH=${MODEL_PATH:-/models}
1414
- BABELTRON_MODEL_TYPE=${BABELTRON_MODEL_TYPE:-m2m100}
15-
- OTLP_MODE=otlp-grpc
16-
- OTEL_SERVICE_NAME=babeltron
17-
- OTLP_COLLECTOR_HOST=otel-collector
18-
- OTLP_COLLECTOR_PORT=4317
15+
- OTLP_MODE=${OTLP_MODE:-otlp-grpc}
16+
- OTEL_SERVICE_NAME=${OTEL_SERVICE_NAME:-babeltron}
17+
- OTLP_COLLECTOR_HOST=${OTEL_COLLECTOR_HOST:-otel-collector}
18+
- OTLP_COLLECTOR_PORT=${OTEL_COLLECTOR_PORT:-4317}
1919
- AUTH_USERNAME=${AUTH_USERNAME}
2020
- AUTH_PASSWORD=${AUTH_PASSWORD}
21-
- WORKER_COUNT=2
21+
- WORKER_COUNT=${WORKER_COUNT:-2}
2222
- PORT=${PORT:-8000}
23+
- NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-all}
24+
- CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
2325
restart: unless-stopped
26+
deploy:
27+
resources:
28+
reservations:
29+
devices:
30+
- driver: nvidia
31+
count: 1
32+
capabilities: [gpu]
2433
healthcheck:
2534
test: ["CMD", "curl", "-f", "http://localhost:${PORT:-8000}/healthz"]
2635
interval: 30s

etc/otel-collector-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ processors:
1717
spike_limit_percentage: 25
1818

1919
exporters:
20-
otlp:
21-
endpoint: jaeger:4317
20+
jaeger:
21+
endpoint: jaeger:14250
2222
tls:
2323
insecure: true
2424
debug:
@@ -38,4 +38,4 @@ service:
3838
traces:
3939
receivers: [otlp]
4040
processors: [memory_limiter, batch]
41-
exporters: [otlp, debug]
41+
exporters: [jaeger, logging, debug]

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "babeltron"
3-
version = "0.4.0"
3+
version = "0.5.0"
44
dynamic = ["version"]
55
description = "A Python-based REST API that leverages single multilingual models like mBERT to provide efficient text translation services"
66
authors = [

0 commit comments

Comments
 (0)