Skip to content

Commit efb2c75

Browse files
committed
updating DOCKERFILE
adding openAI embedding compatible api request
1 parent 56ee115 commit efb2c75

File tree

4 files changed

+208
-97
lines changed

4 files changed

+208
-97
lines changed

Dockerfile

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
2+
FROM python:3.10-slim AS builder
3+
4+
# Install dependencies
5+
COPY app/requirements.txt .
6+
RUN pip install --prefix=/install --no-cache-dir -r requirements.txt
7+
8+
19
FROM python:3.10-slim
210

311
# Set environment variables
@@ -7,16 +15,12 @@ ENV MODEL_CACHE_DIR /app/model_cache
715

816
# Create and set working directory
917
WORKDIR /app
10-
11-
# Install dependencies
12-
COPY app/requirements.txt .
13-
RUN pip install --no-cache-dir -r requirements.txt
14-
1518
# Copy application code
16-
COPY app/main.py .
19+
COPY app ./app
1720

21+
COPY --from=builder /install /usr/local
1822
# Expose the port the app runs on
1923
EXPOSE 8000
2024

2125
# Command to run the application
22-
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
26+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

app/embedding_service.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import os
2+
import io
3+
from typing import Literal, Optional, Union
4+
5+
import requests
6+
from PIL import Image
7+
from pydantic import BaseModel, Field
8+
from app.model_service import loadModel
9+
10+
# --- Configuration ---
11+
MODEL_NAME = os.environ.get("MODEL_NAME", "clip-ViT-B-32")
12+
# TRANSFORMERS_CACHE is set via environment variable in compose.yml
13+
# and defaults to /app/model_cache in the Dockerfile
14+
MODEL_CACHE_DIR = os.environ.get("TRANSFORMERS_CACHE", "/app/model_cache")
15+
16+
# --- Initialization ---
17+
default_model = loadModel(MODEL_NAME)
18+
19+
# --- Pydantic Schemas ---
20+
class TextEmbedRequest(BaseModel):
21+
texts: list[str]
22+
23+
class ImageEmbedRequest(BaseModel):
24+
image_urls: list[str]
25+
26+
class EmbeddingResponse(BaseModel):
27+
embeddings: list[list[float]]
28+
model: str
29+
30+
31+
# --- OpenAI Embedding Response Schema ---
32+
class OpenAIEmbeddingRequest(BaseModel):
33+
model: Optional[str] = Field(MODEL_NAME, example="text-embedding-ada-002")
34+
input: Union[str, list[str]]
35+
encoding_format: Optional[Literal["float"]] = "float"
36+
37+
38+
class Usage(BaseModel):
39+
prompt_tokens: int
40+
total_tokens: int
41+
42+
43+
class EmbeddingItem(BaseModel):
44+
object: str = Field(..., example="embedding")
45+
embedding: list[float]
46+
index: int
47+
48+
49+
class OpenAIEmbeddingResponse(BaseModel):
50+
object: str = Field(..., example="list")
51+
data: list[EmbeddingItem]
52+
model: str
53+
usage: Usage
54+
55+
56+
# --- Utility Functions ---
57+
def get_image_from_url(url: str) -> Image.Image:
58+
"""Downloads an image from a URL and returns a PIL Image object."""
59+
try:
60+
response = requests.get(url, stream=True, timeout=10)
61+
response.raise_for_status()
62+
image = Image.open(io.BytesIO(response.content))
63+
return image
64+
except requests.exceptions.RequestException as e:
65+
raise Exception(f"Failed to download image from {url}: {e}")
66+
except Exception as e:
67+
raise Exception(f"Failed to process image from {url}: {e}")
68+
69+
def embed_text(texts: list[str]) -> EmbeddingResponse:
70+
"""Generates embeddings for a list of text strings."""
71+
if default_model is None:
72+
raise Exception("Model not loaded.")
73+
74+
if not texts:
75+
return EmbeddingResponse(embeddings=[], model=MODEL_NAME)
76+
77+
# Encode the texts
78+
embeddings = default_model.encode(texts, convert_to_numpy=True)
79+
80+
# Convert numpy array to list of lists for JSON serialization
81+
embeddings_list = embeddings.tolist()
82+
83+
return EmbeddingResponse(embeddings=embeddings_list, model=MODEL_NAME)
84+
85+
def embed_image(image_urls: list[str]) -> EmbeddingResponse:
86+
"""Generates embeddings for a list of image URLs."""
87+
if default_model is None:
88+
raise Exception("Model not loaded.")
89+
90+
if not image_urls:
91+
return EmbeddingResponse(embeddings=[], model=MODEL_NAME)
92+
93+
images = []
94+
for url in image_urls:
95+
# Download and process image
96+
image = get_image_from_url(url)
97+
images.append(image)
98+
99+
# Encode the images
100+
# The model.encode method handles both text and image inputs for multimodal models
101+
embeddings = default_model.encode(images, convert_to_numpy=True)
102+
103+
# Convert numpy array to list of lists for JSON serialization
104+
embeddings_list = embeddings.tolist()
105+
106+
return EmbeddingResponse(embeddings=embeddings_list, model=MODEL_NAME)
107+
108+
109+
110+
def open_ai_embed_image(image_urls: list[str], model_name: str) -> OpenAIEmbeddingResponse:
111+
"""Generates embeddings for a list of image URLs."""
112+
if default_model is None and model_name is None:
113+
raise Exception("Model not loaded, and no model is provided.")
114+
115+
if not image_urls:
116+
return OpenAIEmbeddingResponse(object="list", data=[], model=model_name)
117+
118+
images = []
119+
for url in image_urls:
120+
# Download and process image
121+
image = get_image_from_url(url)
122+
images.append(image)
123+
124+
target_model = default_model
125+
if model_name is not MODEL_NAME:
126+
target_model = loadModel(model_name)
127+
128+
# Encode the images
129+
# The model.encode method handles both text and image inputs for multimodal models
130+
embeddings = target_model.encode(images, convert_to_numpy=True)
131+
132+
# Convert numpy array to list of lists for JSON serialization
133+
embeddings_list = [
134+
EmbeddingItem(
135+
object="embedding",
136+
embedding=vector,
137+
index=i,
138+
)
139+
for i, vector in enumerate(embeddings.tolist())
140+
]
141+
142+
return OpenAIEmbeddingResponse(object="list", data=embeddings_list, model=model_name, usage=Usage(prompt_tokens=0, total_tokens=0))

app/main.py

Lines changed: 28 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,50 @@
11
import os
2-
import io
3-
import requests
4-
import numpy as np
5-
from PIL import Image
62
from fastapi import FastAPI, HTTPException
7-
from pydantic import BaseModel
8-
from sentence_transformers import SentenceTransformer
9-
import torch
10-
11-
# --- Configuration ---
12-
MODEL_NAME = os.environ.get("MODEL_NAME", "clip-ViT-B-32")
13-
# TRANSFORMERS_CACHE is set via environment variable in compose.yml
14-
# and defaults to /app/model_cache in the Dockerfile
15-
MODEL_CACHE_DIR = os.environ.get("TRANSFORMERS_CACHE", "/app/model_cache")
16-
17-
# --- Initialization ---
18-
# Initialize the model globally to load it once on startup
19-
try:
20-
# 1. Check if CUDA (GPU) is available
21-
if torch.cuda.is_available():
22-
device = 'cuda'
23-
print("GPU is available. Using GPU.")
24-
else:
25-
device = 'cpu'
26-
print("GPU not available. Using CPU.")
27-
# The model will be downloaded to MODEL_CACHE_DIR if not present
28-
model = SentenceTransformer(MODEL_NAME, cache_folder=MODEL_CACHE_DIR, device=device)
29-
print(f"Successfully loaded model: {MODEL_NAME} from {MODEL_CACHE_DIR}")
30-
except Exception as e:
31-
print(f"Error loading model {MODEL_NAME}: {e}")
32-
# In a real service, you might want to exit or raise an error here
33-
model = None
3+
from app.embedding_service import (
4+
embed_text,
5+
embed_image,
6+
TextEmbedRequest,
7+
ImageEmbedRequest,
8+
EmbeddingResponse,
9+
default_model,
10+
MODEL_NAME, OpenAIEmbeddingResponse, OpenAIEmbeddingRequest, open_ai_embed_image
11+
)
3412

3513
app = FastAPI(
3614
title="Multimodal Embedding Service",
3715
description=f"HTTP service for generating text and image embeddings using {MODEL_NAME}.",
3816
version="1.0.0"
3917
)
4018

41-
# --- Pydantic Schemas ---
42-
class TextEmbedRequest(BaseModel):
43-
texts: list[str]
44-
45-
class ImageEmbedRequest(BaseModel):
46-
image_urls: list[str]
47-
48-
class EmbeddingResponse(BaseModel):
49-
embeddings: list[list[float]]
50-
model: str
51-
52-
# --- Utility Functions ---
53-
def get_image_from_url(url: str) -> Image.Image:
54-
"""Downloads an image from a URL and returns a PIL Image object."""
55-
try:
56-
response = requests.get(url, stream=True, timeout=10)
57-
response.raise_for_status()
58-
image = Image.open(io.BytesIO(response.content))
59-
return image
60-
except requests.exceptions.RequestException as e:
61-
raise HTTPException(status_code=400, detail=f"Failed to download image from {url}: {e}")
62-
except Exception as e:
63-
raise HTTPException(status_code=400, detail=f"Failed to process image from {url}: {e}")
64-
6519
# --- Endpoints ---
6620

6721
@app.get("/health")
6822
async def health_check():
69-
if model is None:
23+
if default_model is None:
7024
raise HTTPException(status_code=503, detail="Model not loaded.")
71-
return {"status": "ok", "model": MODEL_NAME}
25+
return {"status": "ok", "default_model": MODEL_NAME}
7226

7327
@app.post("/embed/text", response_model=EmbeddingResponse)
74-
async def embed_text(request: TextEmbedRequest):
28+
async def embed_text_endpoint(request: TextEmbedRequest):
7529
"""Generates embeddings for a list of text strings."""
76-
if model is None:
77-
raise HTTPException(status_code=503, detail="Model not loaded.")
78-
79-
if not request.texts:
80-
return EmbeddingResponse(embeddings=[], model=MODEL_NAME)
81-
82-
# Encode the texts
83-
embeddings = model.encode(request.texts, convert_to_numpy=True)
84-
85-
# Convert numpy array to list of lists for JSON serialization
86-
embeddings_list = embeddings.tolist()
87-
88-
return EmbeddingResponse(embeddings=embeddings_list, model=MODEL_NAME)
30+
try:
31+
return embed_text(request.texts)
32+
except Exception as e:
33+
raise HTTPException(status_code=500, detail=str(e))
8934

9035
@app.post("/embed/image", response_model=EmbeddingResponse)
91-
async def embed_image(request: ImageEmbedRequest):
36+
async def embed_image_endpoint(request: ImageEmbedRequest):
9237
"""Generates embeddings for a list of image URLs."""
93-
if model is None:
94-
raise HTTPException(status_code=503, detail="Model not loaded.")
95-
96-
if not request.image_urls:
97-
return EmbeddingResponse(embeddings=[], model=MODEL_NAME)
38+
try:
39+
return embed_image(request.image_urls)
40+
except Exception as e:
41+
raise HTTPException(status_code=500, detail=str(e))
9842

99-
images = []
100-
for url in request.image_urls:
101-
# Download and process image
102-
image = get_image_from_url(url)
103-
images.append(image)
10443

105-
# Encode the images
106-
# The model.encode method handles both text and image inputs for multimodal models
107-
embeddings = model.encode(images, convert_to_numpy=True)
108-
109-
# Convert numpy array to list of lists for JSON serialization
110-
embeddings_list = embeddings.tolist()
111-
112-
return EmbeddingResponse(embeddings=embeddings_list, model=MODEL_NAME)
44+
@app.post("/v1/embeddings", response_model=OpenAIEmbeddingResponse)
45+
async def openai_embedding_endpoint(request: OpenAIEmbeddingRequest):
46+
"""Generates embeddings for a list of image URLs."""
47+
try:
48+
return open_ai_embed_image(image_urls=request.input, model_name=request.model)
49+
except Exception as e:
50+
raise HTTPException(status_code=500, detail=str(e))

app/model_service.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import os
2+
3+
import torch
4+
from sentence_transformers import SentenceTransformer
5+
DEFAULT_MODEL_NAME = os.environ.get("MODEL_NAME", "clip-ViT-B-32")
6+
MODEL_CACHE_DIR = os.environ.get("TRANSFORMERS_CACHE", "/app/model_cache")
7+
def loadModel(modelName: str):
8+
# --- Initialization ---
9+
final_model_name = modelName or DEFAULT_MODEL_NAME
10+
11+
# Initialize the model globally to load it once on startup
12+
try:
13+
# 1. Check if CUDA (GPU) is available
14+
if torch.cuda.is_available():
15+
device = 'cuda'
16+
print("GPU is available. Using GPU.")
17+
else:
18+
device = 'cpu'
19+
print("GPU not available. Using CPU.")
20+
# The model will be downloaded to MODEL_CACHE_DIR if not present
21+
model = SentenceTransformer(final_model_name, cache_folder=MODEL_CACHE_DIR, device=device)
22+
print(f"Successfully loaded model: {final_model_name} from {MODEL_CACHE_DIR}")
23+
return model
24+
except Exception as e:
25+
print(f"Error loading model {final_model_name}: {e}")
26+
# In a real service, you might want to exit or raise an error here
27+
model = None

0 commit comments

Comments
 (0)