Skip to content

Commit 625947a

Browse files
committed
413 user story vision language model microservice with pixtral 12b (#416)
1 parent 02e4814 commit 625947a

File tree

11 files changed

+854
-1
lines changed

11 files changed

+854
-1
lines changed

usecases/ai/microservices/text-to-image/stable-diffusion-v3.5/backend/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def main():
8989
return
9090

9191
# Step 2: Select the device
92-
response = client.make_request("POST", "/pipeline/select-device", {"device": "CPU"})
92+
response = client.make_request("POST", "/pipeline/select-device", {"device": "GPU"})
9393
if response:
9494
print(response.json())
9595

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Use a minimal Debian base image for a smaller container footprint
2+
FROM debian:12-slim
3+
4+
# Avoid interactive prompts during package installation
5+
ARG DEBIAN_FRONTEND=noninteractive
6+
7+
# Define the default allowed models and the default model
8+
ARG ALLOWED_MODELS="pixtral-12b"
9+
ARG DEFAULT_MODEL="pixtral-12b"
10+
11+
# Set default model as a build argument and runtime environment variable
12+
ENV DEFAULT_MODEL=${DEFAULT_MODEL}
13+
14+
# Ensure the container is running as root for package installations
15+
USER root
16+
17+
# Set the working directory early for clarity and organization
18+
WORKDIR /usr/src/app
19+
20+
# Install system dependencies, including Python 3.11 and venv
21+
RUN apt-get update \
22+
&& apt-get install -y --no-install-recommends \
23+
ffmpeg \
24+
wget \
25+
git \
26+
gnupg2 \
27+
libtbb12 \
28+
python3.11 \
29+
python3.11-venv \
30+
&& apt-get clean && rm -rf /var/lib/apt/lists/*
31+
32+
# Install Intel GPU drivers with error handling and version pinning
33+
RUN mkdir /tmp/neo \
34+
&& cd /tmp/neo \
35+
&& wget -q https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-core_1.0.17791.9_amd64.deb \
36+
&& wget -q https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17791.9/intel-igc-opencl_1.0.17791.9_amd64.deb \
37+
&& wget -q https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-level-zero-gpu_1.6.31294.12_amd64.deb \
38+
&& wget -q https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/intel-opencl-icd_24.39.31294.12_amd64.deb \
39+
&& wget -q https://github.com/intel/compute-runtime/releases/download/24.39.31294.12/libigdgmm12_22.5.2_amd64.deb \
40+
&& dpkg -i *.deb \
41+
&& rm -rf /tmp/neo
42+
43+
# Install Intel NPU drivers with error handling and version pinning
44+
RUN mkdir /tmp/npu-driver \
45+
&& cd /tmp/npu-driver \
46+
&& wget -q https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-driver-compiler-npu_1.10.0.20241107-11729849322_ubuntu22.04_amd64.deb \
47+
&& wget -q https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-fw-npu_1.10.0.20241107-11729849322_ubuntu22.04_amd64.deb \
48+
&& wget -q https://github.com/intel/linux-npu-driver/releases/download/v1.10.0/intel-level-zero-npu_1.10.0.20241107-11729849322_ubuntu22.04_amd64.deb \
49+
&& wget -q https://github.com/oneapi-src/level-zero/releases/download/v1.17.6/level-zero_1.17.6+u22.04_amd64.deb \
50+
&& dpkg -i *.deb \
51+
&& rm -rf /tmp/npu-driver
52+
53+
# Create a non-root user for OpenVINO and avoid privilege escalation
54+
RUN groupadd -r openvino && useradd -r -g openvino -G video openvino
55+
56+
# Set up a dedicated home directory for the user
57+
RUN mkdir -p /home/openvino && \
58+
chown -R openvino:openvino /home/openvino
59+
60+
# Copy application files and adjust permissions (excluding the virtual environments)
61+
COPY . /usr/src/app
62+
RUN find /usr/src/app -not -path "/usr/src/app/*/.venv*" -exec chown openvino:openvino {} + \
63+
&& find /usr/src/app -not -path "/usr/src/app/*/.venv*" -exec chmod 755 {} +
64+
65+
# Remove existing virtual environments to ensure a clean install
66+
RUN rm -rf /usr/src/app/*/.venv
67+
68+
# Create a Python virtual environment for each model and install dependencies
69+
RUN for model in $ALLOWED_MODELS; do \
70+
python3.11 -m venv /usr/src/app/$model/.venv; \
71+
/usr/src/app/$model/.venv/bin/python -m pip install --no-cache-dir --upgrade pip; \
72+
if [ -f "/usr/src/app/$model/requirements.txt" ]; then \
73+
/usr/src/app/$model/.venv/bin/python -m pip install --no-cache-dir -r /usr/src/app/$model/requirements.txt; \
74+
fi; \
75+
chown -R openvino:openvino /usr/src/app/$model/.venv; \
76+
done
77+
78+
# Set the environment variable for the virtual environment based on the selected model
79+
ENV PATH="/usr/src/app/${SELECTED_MODEL}/.venv/bin:$PATH"
80+
81+
# Switch to the non-root user for security
82+
USER openvino
83+
84+
# Set the working directory based on the selected model
85+
WORKDIR /usr/src/app/${SELECTED_MODEL}
86+
87+
# Expose port for the microservice
88+
EXPOSE 8100
89+
90+
# Allow runtime injection of Hugging Face Token and model selection
91+
# If HF_TOKEN is not provided, models requiring it will fail securely.
92+
CMD ["bash", "-c", \
93+
"SELECTED_MODEL=${MODEL:-$DEFAULT_MODEL} && \
94+
export PATH=/usr/src/app/${SELECTED_MODEL}/.venv/bin:$PATH && \
95+
export HF_TOKEN=${HF_TOKEN:-''} && \
96+
echo Using model: $SELECTED_MODEL && \
97+
echo HF_TOKEN set to: ${HF_TOKEN:0:5}****** && \
98+
/usr/src/app/${SELECTED_MODEL}/.venv/bin/python /usr/src/app/${SELECTED_MODEL}/backend/server.py"]
99+
100+
# Add a basic health check
101+
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s \
102+
CMD exit 0
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# Vision Language Model
2+
3+
## Overview
4+
Vision Language Model microservice is a FastAPI-based API that leverages vision-language models to perform visual question answering and image captioning tasks.
5+
It provides endpoints for managing the pipeline, checking the pipeline status, retrieving the generated text responses, and performing a health check. The service enables AI-powered analysis of images with natural language interactions, allowing users to get detailed descriptions of images and ask questions about image content.
6+
7+
### Supported Models
8+
* Pixtral 12B
9+
10+
### Supported Inference Device
11+
* CPU
12+
* GPU
13+
14+
---
15+
16+
## Quick Start
17+
18+
### 1. Install Operating System
19+
- Install the latest [Ubuntu 24.04 LTS Desktop](https://releases.ubuntu.com/jammy/). Refer to the [Ubuntu Desktop installation tutorial](https://ubuntu.com/tutorials/install-ubuntu-desktop#1-overview) if needed.
20+
21+
### 2. Install GPU Driver (Optional)
22+
- If you plan to use a GPU for inference, install the appropriate GPU driver:
23+
- **Intel® Arc™ A-Series Graphics:** [Installation Guide](https://github.com/intel/edge-developer-kit-reference-scripts/tree/main/gpu/arc/dg2)
24+
- **Intel® Data Center GPU Flex Series:** [Installation Guide](https://github.com/intel/edge-developer-kit-reference-scripts/tree/main/gpu/flex/ats)
25+
26+
### 3. Install Docker Engine
27+
- Follow the official [Docker installation guide](https://docs.docker.com/engine/install/) to set up Docker Engine on your system.
28+
29+
### 4. Build and Run the Docker Container
30+
- Build the text-to-image generation docker image.
31+
```bash
32+
docker build --network=host -t vlm .
33+
```
34+
- Export the required environment variables and run the container:
35+
```bash
36+
# Select your text-to-image model.
37+
# Eg. pixtral-12b.
38+
export VLM_MODEL=pixtral-12b
39+
40+
# Insert huggingface login token
41+
export HF_TOKEN=<your_huggingface_token>
42+
43+
# Run the container
44+
export RENDER_GROUP_ID=$(getent group render | cut -d: -f3)
45+
46+
docker run -it --rm \
47+
--name vlm-container \
48+
--group-add $RENDER_GROUP_ID \
49+
--device /dev/dri:/dev/dri \
50+
-p 8100:8100 \
51+
-e MODEL=$VLM_MODEL \
52+
-e HF_TOKEN=$HF_TOKEN \
53+
-v $(pwd)/data:/usr/src/app/data \
54+
vlm
55+
```
56+
57+
58+
## Development
59+
60+
---
61+
62+
### 1. Setup text-to-image generation server
63+
64+
- Change the current directory to the selected model. For example:
65+
```bash
66+
cd pixtral-12b
67+
```
68+
69+
- Execute the setup script.
70+
```bash
71+
./setup.sh
72+
```
73+
74+
### 2. Verify the server by running the example (optional)
75+
```bash
76+
./run.sh
77+
```
78+
79+
___
80+
81+
## Routes
82+
83+
### 1. **POST /pipeline/select-device**
84+
- **Description**: Selects and compiles the device for the pipeline.
85+
- **Request Body**:
86+
```
87+
{
88+
"device": "<device_name>"
89+
}
90+
```
91+
- **Response**:
92+
- Success:
93+
```
94+
{
95+
"status": "success",
96+
"message": "Pipeline prepared on <device_name>."
97+
}
98+
```
99+
- Error:
100+
```
101+
{
102+
"status": "error",
103+
"message": "<error_message>"
104+
}
105+
```
106+
107+
### 2. **POST /pipeline/run**
108+
- **Description**: Starts the pipeline execution asynchronously in the background.
109+
- **Request Body**:
110+
```
111+
{
112+
"prompt": "<your_prompt>"
113+
}
114+
```
115+
- **Response**:
116+
- Success:
117+
```
118+
{
119+
"status": "success",
120+
"message": "Pipeline execution started in background."
121+
}
122+
```
123+
- Error:
124+
```
125+
{
126+
"status": "error",
127+
"message": "Pipeline execution is already running or pipeline is not initialized."
128+
}
129+
```
130+
131+
### 3. **GET /pipeline/status**
132+
- **Description**: Checks the current status of the pipeline.
133+
- **Response**:
134+
```
135+
{
136+
"running": <true or false>,
137+
"completed": <true or false>
138+
}
139+
```
140+
141+
### 4. **GET /pipeline/answer**
142+
- **Description**: Retrieves the generated answer once the pipeline has completed execution. This endpoint is available only when the pipeline has finished processing.
143+
- **Response**:
144+
- Success:
145+
```
146+
{
147+
"status": "success",
148+
"answer": "<generated_answer>"
149+
}
150+
```
151+
Error:
152+
```
153+
{
154+
"status": "error",
155+
"message": "Pipeline execution is not yet complete."
156+
}
157+
```
158+
159+
### 5. **GET /health**
160+
- **Description**: A simple health check endpoint to ensure that the API is up and running.
161+
- **Response**:
162+
```
163+
{
164+
"status": "healthy"
165+
}
166+
```
167+
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
.venv
2+
pixtral-12b
3+
kernel.errors.txt

0 commit comments

Comments
 (0)