Support other models in multimodal generator (#274)

adrianboguszewski · web-flow · commit 296df7d87d17 · 2025-06-04T13:03:40.000+02:00
* Fix CI for multimodal generator

* Allowed server to accpet different models

* Replaced prints with logger
diff --git a/ai_ref_kits/multimodal_ai_visual_generator/README.md b/ai_ref_kits/multimodal_ai_visual_generator/README.md
@@ -171,12 +171,26 @@ This app has two components: a FastAPI backend and a Streamlit frontend.
 
 ### Step 1: Run FastAPI (in Terminal 1)
 
+The FastAPI backend can be configured using environment variables to specify which models to use:
+
+- `IMAGE_MODEL_TYPE`: The type of image generation model to use (default: "flux.1-schnell")
+- `LLM_MODEL_TYPE`: The type of language model to use (default: "qwen2-7B")
+- `MODEL_PRECISION`: The precision to use for both models (default: "int4")
+
+You can set these variables when running the application:
+
 ```bash
 cd openvino_build_deploy/ai_ref_kits/multimodal_ai_visual_generator
 source venv/bin/activate         # On Windows: venv\Scripts\activate
+
+# Run with default values
 uvicorn main:app --host 0.0.0.0 --port 8000
+
+# Or run with custom model configuration
+IMAGE_MODEL_TYPE="your-image-model" LLM_MODEL_TYPE="your-llm-model" MODEL_PRECISION="int4" uvicorn main:app --host 0.0.0.0 --port 8000
 ```
-> **Note:** If you're using different models, update the paths in `main.py` accordingly.
+
+If no environment variables are set, the application will use the default values.
 
 ### Step 2: Run Streamlit UI (in Terminal 2)
 
@@ -205,7 +219,7 @@ Branding mode:
 ## Additional Resources
 
 - Learn more about [OpenVINO](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html)
-- Explore [OpenVINO’s documentation](https://docs.openvino.ai/2024/home.html)
+- Explore [OpenVINO's documentation](https://docs.openvino.ai/2024/home.html)
 
 <p align="right"><a href="#top">Back to top ⬆️</a></p>
 
diff --git a/ai_ref_kits/multimodal_ai_visual_generator/ci/test.py b/ai_ref_kits/multimodal_ai_visual_generator/ci/test.py
@@ -2,6 +2,7 @@
 import time
 import requests
 import sys
+import os
 from pathlib import Path
 import logging
 
@@ -19,8 +20,8 @@
 
 # ----- Configuration -----
 MODEL_DIR = Path("models")
-LLM_MODEL_TYPE = "qwen2-7B"
-IMAGE_MODEL_TYPE = "flux.1-schnell"
+LLM_MODEL_TYPE = "tiny-llama-1b-chat"
+IMAGE_MODEL_TYPE = "lcm"
 PRECISION = "int4"
 
 # ----- Step 1: Export Models if Needed -----
@@ -30,7 +31,17 @@
 
 # ----- Step 2: Launch FastAPI Backend -----
 logger.info("Launching FastAPI server...")
-process = subprocess.Popen([sys.executable, "-m", "uvicorn", "main:app", "--host", "127.0.0.1", "--port", "8000"])
+env = os.environ.copy()
+env.update({
+    "IMAGE_MODEL_TYPE": IMAGE_MODEL_TYPE,
+    "LLM_MODEL_TYPE": LLM_MODEL_TYPE,
+    "MODEL_PRECISION": PRECISION
+})
+
+process = subprocess.Popen(
+    [sys.executable, "-m", "uvicorn", "main:app", "--host", "127.0.0.1", "--port", "8000"],
+    env=env
+)
 
 try:
     # Wait up to ~130 seconds (130 retries x 1s sleep) for FastAPI server to come up
diff --git a/ai_ref_kits/multimodal_ai_visual_generator/convert_and_optimize_text2image.py b/ai_ref_kits/multimodal_ai_visual_generator/convert_and_optimize_text2image.py
@@ -2,7 +2,6 @@
 import subprocess
 import platform
 from pathlib import Path
-import json
 import os
 import logging
 
@@ -16,6 +15,7 @@
     "stable-diffusionv3-large": "stabilityai/stable-diffusion-3.5-large",
     "stable-diffusionv3-medium": "stabilityai/stable-diffusion-3.5-medium",
     "stable-diffusion-2-1": "stabilityai/stable-diffusion-2-1",
+    "lcm": "SimianLuo/LCM_Dreamshaper_v7",
 }
 
 CRITICAL_FILES = [
diff --git a/ai_ref_kits/multimodal_ai_visual_generator/main.py b/ai_ref_kits/multimodal_ai_visual_generator/main.py
@@ -10,7 +10,6 @@
 import base64
 import sys
 import yaml
-import subprocess
 import openvino_genai as ov_genai
 import logging
 import random
@@ -47,10 +46,14 @@
 PROJECT_ROOT = Path(__file__).resolve().parent
 CONFIG_PATH = PROJECT_ROOT / "config" / "illustration.yaml"
 
+# Get model types from environment variables with defaults
+IMAGE_MODEL_TYPE = os.getenv("IMAGE_MODEL_TYPE", "flux.1-schnell")
+LLM_MODEL_TYPE = os.getenv("LLM_MODEL_TYPE", "qwen2-7B")
+PRECISION = os.getenv("MODEL_PRECISION", "int4")
 
-IMAGE_MODEL_TYPE = "flux.1-schnell"
-LLM_MODEL_TYPE = "qwen2-7B"
-PRECISION = "int4"
+logger.info(f"Using Image Model Type: {IMAGE_MODEL_TYPE}")
+logger.info(f"Using LLM Model Type: {LLM_MODEL_TYPE}")
+logger.info(f"Using Model Precision: {PRECISION}")
 
 image_model_dir = PROJECT_ROOT / "models" / f"{IMAGE_MODEL_TYPE}-{PRECISION.upper()}"
 llm_model_dir = PROJECT_ROOT / "models" / f"{LLM_MODEL_TYPE}-{PRECISION.upper()}"
@@ -206,9 +209,9 @@ def callback(step, num_steps, latent):
 
 # ---------- Server Start Print ----------
 if image_pipe or llm_pipe:
-    print("FastAPI backend is running.")
-    print("In a separate terminal, start the Streamlit app using:")
-    print("streamlit run streamlit_app.py")
+    logger.info("Demo is ready!")
+    logger.info("FastAPI backend is running.")
+    logger.info("In a separate terminal, start the Streamlit app using: streamlit run streamlit_app.py")
 else:
-    print("FastAPI backend is running, but no models were loaded.")
-    print("Please export models before running the Streamlit app.")
+    logger.warning("FastAPI backend is running, but no models were loaded.")
+    logger.warning("Please export models before running the Streamlit app.")