hspedro
diff --git a/‎.coveragerc‎
Lines changed: 2 additions & 2 deletions b/‎.coveragerc‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 8 additions & 1 deletion b/‎Makefile‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎babeltron/app/main.py‎
Lines changed: 26 additions & 1 deletion b/‎babeltron/app/main.py‎
Lines changed: 26 additions & 1 deletion
diff --git a/‎babeltron/app/routers/translate.py‎
Lines changed: 16 additions & 8 deletions b/‎babeltron/app/routers/translate.py‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎babeltron/app/utils.py‎
Lines changed: 55 additions & 1 deletion b/‎babeltron/app/utils.py‎
Lines changed: 55 additions & 1 deletion
diff --git a/‎docker-compose.yml‎
Lines changed: 1 addition & 0 deletions b/‎docker-compose.yml‎
Lines changed: 1 addition & 0 deletions
@@ -1,6 +1,6 @@
 [run]
 source = babeltron
-omit = 
+omit =
     */tests/*
     */test/*
     */venv/*
@@ -17,4 +17,4 @@ exclude_lines =
     raise ImportError
 
 [html]
-directory = htmlcov 
+directory = htmlcov
@@ -52,5 +52,5 @@ jobs:
         name: dist
         path: dist/
 
-    - name: Check code quality
+    - name: Run linters
       run: make lint
@@ -1,4 +1,4 @@
-.PHONY: check-poetry install test lint format help system-deps coverage coverage-html download-model download-model-small download-model-medium download-model-large serve serve-prod docker-build docker-run docker-compose-up docker-compose-down
+.PHONY: check-poetry install test lint format help system-deps coverage coverage-html download-model download-model-small download-model-medium download-model-large serve serve-prod docker-build docker-run docker-compose-up docker-compose-down pre-commit-install pre-commit-run
 
 # Define model path variable with default value, can be overridden by environment
 MODEL_PATH ?= ./models
@@ -131,3 +131,10 @@ docker-up: ## Build and start services with docker-compose
 docker-down:
 	@echo "Stopping docker-compose services..."
 	@docker-compose down
+
+pre-commit-install:
+	pip install pre-commit
+	pre-commit install
+
+pre-commit-run:
+	pre-commit run --all-files
@@ -169,6 +169,10 @@ The Docker setup mounts the local `./models` directory to `/models` inside the c
 
 If no models are found when starting the container, you'll be prompted to download the small model automatically.
 
+## Contributing
+
+Install pre-commit hooks with `make pre-commit-install` and refer to the [CONTRIBUTING.md](docs/CONTRIBUTING.md) file for more information.
+
 ## License
 
 MIT License
 
@@ -1,7 +1,14 @@
+import os
+from contextlib import asynccontextmanager
 from importlib.metadata import version
+from typing import AsyncIterator
 
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi_cache import FastAPICache
+from fastapi_cache.backends.inmemory import InMemoryBackend
+from fastapi_cache.backends.redis import RedisBackend
+from redis import asyncio as aioredis
 
 from babeltron.app.utils import include_routers
 
@@ -10,6 +17,24 @@
 except ImportError:
     __version__ = "0.1.0-dev"
 
+
+@asynccontextmanager
+async def lifespan(_: FastAPI) -> AsyncIterator[None]:
+    cache_url = os.environ.get("CACHE_URL")
+
+    if cache_url.startswith("in-memory"):
+        FastAPICache.init(InMemoryBackend(), prefix="babeltron")
+        print("Using in-memory cache")
+    elif cache_url.startswith("redis"):
+        redis = aioredis.from_url(cache_url)
+        FastAPICache.init(RedisBackend(redis), prefix="babeltron")
+        print("Using Redis cache")
+    else:
+        print("No cache_url provided, not using cache")
+
+    yield
+
+
 app = FastAPI(
     title="Babeltron Translation API",
     description="API for machine translation using NLLB models",
@@ -26,6 +51,7 @@
     docs_url="/docs",
     redoc_url="/redoc",
     openapi_url="/openapi.json",
+    lifespan=lifespan,
 )
 
 # Configure CORS
@@ -40,7 +66,6 @@
 # Include all routers
 include_routers(app)
 
-# This allows running the app directly with uvicorn when this file is executed
 if __name__ == "__main__":
     import uvicorn
 
 
@@ -1,14 +1,19 @@
+import os
+
+import torch
 from fastapi import APIRouter, HTTPException, status
+from fastapi_cache.decorator import cache
 from pydantic import BaseModel, Field
 from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
-import os
-import torch
 
-from babeltron.app.utils import get_model_path
+from babeltron.app.utils import ORJsonCoder, cache_key_builder, get_model_path
 
 router = APIRouter(tags=["Translation"])
 
-MODEL_COMPRESSION_ENABLED = os.environ.get("MODEL_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
+MODEL_COMPRESSION_ENABLED = os.environ.get(
+    "MODEL_COMPRESSION_ENABLED", "true"
+).lower() in ("true", "1", "yes")
+CACHE_TTL_SECONDS = int(os.environ.get("CACHE_TTL_SECONDS", "3600"))
 
 try:
     MODEL_PATH = get_model_path()
@@ -19,7 +24,7 @@
     if MODEL_COMPRESSION_ENABLED and torch.cuda.is_available():
         print("Applying FP16 model compression")
         model = model.half()  # Convert to FP16 precision
-        model = model.to('cuda')  # Move to GPU
+        model = model.to("cuda")  # Move to GPU
     elif MODEL_COMPRESSION_ENABLED:
         print("FP16 compression enabled but GPU not available, using CPU")
     else:
@@ -71,6 +76,7 @@ class TranslationResponse(BaseModel):
     response_description="The translated text in the target language",
     status_code=status.HTTP_200_OK,
 )
+@cache(expire=CACHE_TTL_SECONDS, key_builder=cache_key_builder, coder=ORJsonCoder)
 async def translate(request: TranslationRequest):
     if model is None or tokenizer is None:
         raise HTTPException(
@@ -84,17 +90,19 @@ async def translate(request: TranslationRequest):
 
         # Move input to GPU if model is on GPU
         if torch.cuda.is_available() and next(model.parameters()).is_cuda:
-            encoded_text = {k: v.to('cuda') for k, v in encoded_text.items()}
+            encoded_text = {k: v.to("cuda") for k, v in encoded_text.items()}
 
         generated_tokens = model.generate(
             **encoded_text, forced_bos_token_id=tokenizer.get_lang_id(request.tgt_lang)
         )
-        translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
+        translation = tokenizer.batch_decode(
+            generated_tokens, skip_special_tokens=True
+        )[0]
         return {"translation": translation}
     except Exception as e:
         raise HTTPException(
             status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"Error during translation: {str(e)}"
+            detail=f"Error during translation: {str(e)}",
         )
 
 
 
@@ -1,9 +1,15 @@
+import hashlib
 import importlib
+import json
 import os
 import pkgutil
 from pathlib import Path
+from typing import Any
 
-from fastapi import FastAPI
+import orjson
+from fastapi import FastAPI, Request, Response
+from fastapi.encoders import jsonable_encoder
+from fastapi_cache import Coder
 
 
 def get_model_path() -> str:
@@ -36,3 +42,51 @@ def include_routers(app: FastAPI):
         module = importlib.import_module(f"{routers_package}.{module_name}")
         if hasattr(module, "router"):
             app.include_router(module.router)
+
+
+def cache_key_builder(
+    func,
+    namespace: str = "",
+    request: Request = None,
+    response: Response = None,
+    *args,
+    **kwargs,
+) -> str:
+    if request is None:
+        return ""
+
+    body_data = {}
+    if hasattr(request, "state") and hasattr(request.state, "body"):
+        try:
+            body_data = json.loads(request.state.body)
+        except (json.JSONDecodeError, AttributeError):
+            pass
+
+    src_lang = body_data.get("src_lang", "")
+    dst_lang = body_data.get("dst_lang", "")
+    text = body_data.get("text", "")
+
+    text_md5 = hashlib.md5(text.encode()).hexdigest() if text else ""
+
+    return ":".join(
+        [
+            namespace,
+            src_lang,
+            dst_lang,
+            text_md5,
+        ]
+    )
+
+
+class ORJsonCoder(Coder):
+    @classmethod
+    def encode(cls, value: Any) -> bytes:
+        return orjson.dumps(
+            value,
+            default=jsonable_encoder,
+            option=orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY,
+        )
+
+    @classmethod
+    def decode(cls, value: bytes) -> Any:
+        return orjson.loads(value)
@@ -11,6 +11,7 @@ services:
       - ./models:/models
     environment:
       - MODEL_PATH=/models
+      - CACHE_URL=in-memory
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/healthz"]