Merge pull request #91 from intel/update-branch

gooishin · web-flow · commit 4426c0d2ae31 · 2025-01-02T10:58:20.000+08:00
feat: update Dockerfile and requirements for speech-to-text microservice to use OpenVINO 2024.6.0; add README for setup instructions (#279)
diff --git a/usecases/ai/microservices/speech-to-text/Dockerfile b/usecases/ai/microservices/speech-to-text/Dockerfile
@@ -1,7 +1,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-FROM openvino/ubuntu22_dev:2024.5.0
+FROM openvino/ubuntu22_dev:2024.6.0
 
 USER root
 
diff --git a/usecases/ai/microservices/speech-to-text/README.md b/usecases/ai/microservices/speech-to-text/README.md
@@ -0,0 +1,55 @@
+# Automatic Speech Recognition 
+
+## Requirements
+
+### Validated Hardware Requirements
+- **CPU:** 13th generation Intel Core processors or newer
+- **GPU:** Intel® Arc™ graphics
+- **RAM:** 32GB (may vary based on model size)
+- **Disk:** 128GB (may vary based on model size)
+
+### Supported Inference Device
+* CPU
+* GPU
+* NPU
+
+## Quick Start
+### 1. Install Operating System
+Install the latest [Ubuntu 22.04 LTS Desktop](https://releases.ubuntu.com/jammy/). Refer to the [Ubuntu Desktop installation tutorial](https://ubuntu.com/tutorials/install-ubuntu-desktop#1-overview) if needed.
+
+### 2. Install GPU Driver (Optional)
+If you plan to use a GPU for inference, install the appropriate GPU driver:
+- **Intel® Arc™ A-Series Graphics:** [Installation Guide](https://github.com/intel/edge-developer-kit-reference-scripts/tree/main/gpu/arc/dg2)
+- **Intel® Data Center GPU Flex Series:** [Installation Guide](https://github.com/intel/edge-developer-kit-reference-scripts/tree/main/gpu/flex/ats)
+
+### 3. Set Up Docker
+Follow the instructions [here](https://docs.docker.com/engine/install/) to install Docker and Docker Compose.
+
+### 4. Build the Automatic Speech Recognition Docker Image
+```bash
+docker build -t automatic-speech-recognition .
+```
+
+### 5. Run the Automatic Speech Recognition container
+* **CPU**
+```bash
+docker run -it --rm \
+    -p 5996:5996 \
+    -e DEFAULT_MODEL_ID=openai/whisper-tiny \
+    -e STT_DEVICE=CPU \
+    -v ./data:/usr/src/app/data \
+    automatic-speech-recognition
+```
+
+* **GPU**
+```bash
+export RENDER_GROUP_ID=$(getent group render | cut -d: -f3)
+docker run -it --rm \
+    --group-add $RENDER_GROUP_ID \
+    --device /dev/dri:/dev/dri \
+    -p 5996:5996 \
+    -e DEFAULT_MODEL_ID=openai/whisper-tiny \
+    -e STT_DEVICE=GPU \
+    -v ./data:/usr/src/app/data \
+    automatic-speech-recognition
+```
diff --git a/usecases/ai/microservices/speech-to-text/requirements.txt b/usecases/ai/microservices/speech-to-text/requirements.txt
@@ -1,9 +1,9 @@
 fastapi[all]==0.115.6
 uvicorn==0.32.1
-
 soundfile==0.12.1
 pydub==0.25.1
 
-openvino==2024.5.0
-openvino_genai==2024.5.0
-optimum-intel[openvino,nncf]==1.21.0
+--extra-index-url https://download.pytorch.org/whl/cpu
+optimum-intel[openvino,nncf]==1.21.0
+openvino==2024.6.0
+openvino_genai==2024.6.0