open-edge-platform
diff --git a/‎usecases/ai/digital-avatar/.gitignore‎
Lines changed: 19 additions & 1 deletion b/‎usecases/ai/digital-avatar/.gitignore‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎usecases/ai/digital-avatar/README.md‎
Lines changed: 14 additions & 12 deletions b/‎usecases/ai/digital-avatar/README.md‎
Lines changed: 14 additions & 12 deletions
diff --git a/‎…liveportrait/pretrained_weights/.gitkeep‎ ‎…ecases/ai/digital-avatar/assets/.gitkeep‎usecases/ai/digital-avatar/backend/liveportrait/liveportrait/pretrained_weights/.gitkeep renamed to usecases/ai/digital-avatar/assets/.gitkeep b/‎…liveportrait/pretrained_weights/.gitkeep‎ ‎…ecases/ai/digital-avatar/assets/.gitkeep‎usecases/ai/digital-avatar/backend/liveportrait/liveportrait/pretrained_weights/.gitkeep renamed to usecases/ai/digital-avatar/assets/.gitkeep
diff --git a/‎usecases/ai/digital-avatar/backend/RealESRGan/overwrite/inference.py‎
Lines changed: 89 additions & 0 deletions b/‎usecases/ai/digital-avatar/backend/RealESRGan/overwrite/inference.py‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎usecases/ai/digital-avatar/backend/RealESRGan/overwrite/intel_xpu/__pycache__/attention.cpython-310.pyc‎
4.05 KB b/‎usecases/ai/digital-avatar/backend/RealESRGan/overwrite/intel_xpu/__pycache__/attention.cpython-310.pyc‎
4.05 KB
diff --git a/‎usecases/ai/digital-avatar/backend/RealESRGan/overwrite/intel_xpu/__pycache__/xpu_override.cpython-310.pyc‎
9.44 KB b/‎usecases/ai/digital-avatar/backend/RealESRGan/overwrite/intel_xpu/__pycache__/xpu_override.cpython-310.pyc‎
9.44 KB
diff --git a/‎…rait/liveportrait/intel_xpu/attention.py‎ ‎…lESRGan/overwrite/intel_xpu/attention.py‎usecases/ai/digital-avatar/backend/liveportrait/liveportrait/intel_xpu/attention.py renamed to usecases/ai/digital-avatar/backend/RealESRGan/overwrite/intel_xpu/attention.py
Lines changed: 3 additions & 0 deletions b/‎…rait/liveportrait/intel_xpu/attention.py‎ ‎…lESRGan/overwrite/intel_xpu/attention.py‎usecases/ai/digital-avatar/backend/liveportrait/liveportrait/intel_xpu/attention.py renamed to usecases/ai/digital-avatar/backend/RealESRGan/overwrite/intel_xpu/attention.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎…t/liveportrait/intel_xpu/xpu_override.py‎ ‎…RGan/overwrite/intel_xpu/xpu_override.py‎usecases/ai/digital-avatar/backend/liveportrait/liveportrait/intel_xpu/xpu_override.py renamed to usecases/ai/digital-avatar/backend/RealESRGan/overwrite/intel_xpu/xpu_override.py
Lines changed: 3 additions & 0 deletions b/‎…t/liveportrait/intel_xpu/xpu_override.py‎ ‎…RGan/overwrite/intel_xpu/xpu_override.py‎usecases/ai/digital-avatar/backend/liveportrait/liveportrait/intel_xpu/xpu_override.py renamed to usecases/ai/digital-avatar/backend/RealESRGan/overwrite/intel_xpu/xpu_override.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎usecases/ai/digital-avatar/backend/RealESRGan/overwrite/realesrgan/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎usecases/ai/digital-avatar/backend/RealESRGan/overwrite/realesrgan/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎usecases/ai/digital-avatar/backend/RealESRGan/overwrite/realesrgan/archs/__init__.py‎
Lines changed: 13 additions & 0 deletions b/‎usecases/ai/digital-avatar/backend/RealESRGan/overwrite/realesrgan/archs/__init__.py‎
Lines changed: 13 additions & 0 deletions
@@ -1,5 +1,6 @@
 __pycache__
 .env
+.venv
 
 ffmpeg*/
 checkpoints
@@ -9,6 +10,23 @@ backend/musetalk/data/avatars
 backend/wav2lip/wav2lip/results
 backend/wav2lip/wav2lip/temp
 assets/*
+!assets/.gitkeep
 weights/*
+!weights/.gitkeep
+!/weights/checkpoints
+/weights/checkpoints/*
+!/weights/checkpoints/.gitkeep
 backend/liveportrait/templates
-/data/*
+/data/*
+!/data/audio
+/data/audio/*
+!/data/audio/.gitkeep
+!/data/wav2lip
+/data/wav2lip/*
+!/data/wav2lip/.gitkeep
+!/data/sadtalker
+/data/sadtalker/*
+!/data/sadtalker/.gitkeep
+!/data/piper
+/data/piper/*
+!/data/piper/.gitkeep
@@ -27,15 +27,15 @@ A digital avatar that utilizes Image to Video, Text To Speech, Speech To Text, a
 - CPU: 13th generations of Intel Core i5 and above
 - GPU: Intel® Arc™ A770 graphics (16GB)
 - RAM: 32GB
-- DISK: 128GB
+- DISK: 256GB
 
 ## Application Ports
 Please ensure that you have these ports available before running the applications.
 
 | Apps         | Port |
 |--------------|------|
 | Lipsync      | 8011 |
-| LivePortrait | 8012 |
+| RAG          | 8012 |
 | TTS          | 8013 |
 | STT          | 8014 |
 | OLLAMA       | 8015 |
@@ -50,15 +50,15 @@ Please ensure that you have these ports available before running the application
     1. Refer to [here](../../../README.md#gpu) to install Intel GPU Drivers
 1. **Download Wav2Lip Model**: Download the [Wav2Lip model](https://iiitaphyd-my.sharepoint.com/:u:/g/personal/radrabha_m_research_iiit_ac_in/EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA?e=n9ljGW) and place the file in the `weights` folder.
 1. **Create Avatar**:
-    1. Place an `image.png` file containing an image of a person (preferably showing at least the upper half of the body) in the assets folder.
-    2. Place an `idle.mp4` file of a person with some movement such as eye blinking (to be used as a reference) in the assets folder.
+    1. Place a `video.mp4` file in the `assets` folder. The video should feature an idle person (preferably showing at least the upper half of the body) with subtle movements like blinking or slight body motion, and **no speaking**. Ensure the file is named **`video.mp4`**.
 
 ### Setup ENV
 1. Create a `.env` file and copy the contents from `.env.template`:
     ```bash
     cp .env.template .env
     ```
-2. Modify the `LLM_MODEL` in the `.env` file. Refer to [Ollama library](https://ollama.com/library) for available models. (Default is `QWEN2.5`).
+* Note: Modify the `LLM_MODEL` in the `.env` file in order to change the LLM used by ollama. Refer to [Ollama library](https://ollama.com/library) for available models. (Default is `QWEN2.5`).
+
 
 ### Build Docker Container
 ```bash
@@ -78,12 +78,14 @@ docker compose up -d
 ### Device Workload Configurations
 You can offload model inference to specific device by modifying the environment variable setting in the docker-compose.yml file.
 
-| Workload             | Environment Variable |Supported Device         | 
-|----------------------|----------------------|-------------------------|
-| LLM                  |            -         |        GPU              |
-| STT                  | STT_DEVICE           | CPU,GPU,NPU             | 
-| TTS                  | TTS_DEVICE           | CPU                     |
-| Lipsync (Wav2lip)    | DEVICE               | CPU, GPU                |
+| Workload                       | Environment Variable |Supported Device         | 
+|--------------------------------|----------------------|-------------------------|
+| LLM                            |            -         |        GPU(D)              |
+| STT                            | STT_DEVICE           | CPU(D) ,GPU, NPU             | 
+| TTS                            | TTS_DEVICE           | CPU(D)                     |
+| Lipsync (Wav2lip/Sadtalker)    | DEVICE               | CPU(D-wav2lip), GPU(D-sadtalker)                |
+
+* Note: (D) = default device
 
 Example Configuration:
 
@@ -94,7 +96,7 @@ stt_service:
   ...
   environment:
     ...
-    STT_DEVICE=CPU
+    STT_DEVICE=NPU
     ...
 ```
 
 
@@ -0,0 +1,89 @@
+# Copyright(C) 2024 Intel Corporation
+# SPDX - License - Identifier: Apache - 2.0
+
+import os
+
+from basicsr.archs.rrdbnet_arch import RRDBNet
+from basicsr.utils.download_util import load_file_from_url
+from RealESRGan.realesrgan import RealESRGANer
+from RealESRGan.realesrgan.archs.srvgg_arch import SRVGGNetCompact
+
+
+def initialize(model_name="RealESRGAN_x2plus", device="cpu"):
+    models = {
+        "RealESRGAN_x2plus": {
+            "url": ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth"],
+            "name": "RealESRGAN_x2plus",
+            "model": RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2),
+            "netscale": 2
+        },
+        "RealESRGAN_x4plus": {
+            "url": ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth'],
+            "name": "RealESRGAN_x4plus",
+            "model": RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4),
+            "netscale": 4
+        },
+        "realesr-animevideov3": {
+            "url": ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth"],
+            "name": "realesr-animevideov3",
+            "model": SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu'),
+            "netscale": 4
+        },
+        "realesr-general-x4v3":{
+            "url": [
+                "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth",
+                "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth"
+                ],
+            "name": "realesr-animevideov3",
+            "model": SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu'),
+            "netscale": 4
+        },
+        "realesr-general-x4v3-dn":{
+            "url": [
+                "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth"
+                ],
+            "name": "realesr-animevideov3",
+            "model": SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu'),
+            "netscale": 4
+        },
+        "RealESRGAN_x4plus_anime_6B":{
+             "url": [
+                'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth'
+                ],
+            "name": "RealESRGAN_x4plus_anime_6B",
+            "model": RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4),
+            "netscale": 4
+        }
+    }
+    
+    if model_name not in models:
+        raise ValueError(f"Model name {model_name} not found")
+
+    model = models[model_name]
+
+    model_path = os.path.join('weights', model_name + '.pth')
+    if not os.path.isfile(model_path):
+        ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+        # model_path will be updated
+        model_path = load_file_from_url(
+            url=model["url"][0], model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
+
+    # use dni to control the denoise strength
+    dni_weight = None
+
+    # restorer
+    upsampler = RealESRGANer(
+        scale=model["netscale"],
+        model_path=model_path,
+        dni_weight=dni_weight,
+        model=model['model'],
+        tile=0,
+        tile_pad=10,
+        pre_pad=0,
+        half=False,
+        device=device)
+
+    return upsampler
+
+if __name__=="__main__":
+    initialize()
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 import os
 import torch
 from functools import cache
 
@@ -1,3 +1,6 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 import sys
 import os
 # os.add_dll_directory(os.path.join(sys.base_exec_prefix))
 
@@ -0,0 +1,8 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+# flake8: noqa
+from .archs import *
+from .data import *
+from .models import *
+from .utils import *
@@ -0,0 +1,13 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import importlib
+from basicsr.utils import scandir
+from os import path as osp
+
+# automatically scan and import arch modules for registry
+# scan all the files that end with '_arch.py' under the archs folder
+arch_folder = osp.dirname(osp.abspath(__file__))
+arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')]
+# import all the arch modules
+_arch_modules = [importlib.import_module(f'RealESRGan.realesrgan.archs.{file_name}') for file_name in arch_filenames]