Skip to content

Commit 8395bb3

Browse files
authored
Merge pull request #168 from intel/update-branch
Add Digital Avatar features (#436)
2 parents 464b8ae + 9ef37a4 commit 8395bb3

File tree

258 files changed

+8092
-20930
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

258 files changed

+8092
-20930
lines changed

usecases/ai/digital-avatar/.gitignore

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
__pycache__
22
.env
3+
.venv
34

45
ffmpeg*/
56
checkpoints
@@ -9,6 +10,23 @@ backend/musetalk/data/avatars
910
backend/wav2lip/wav2lip/results
1011
backend/wav2lip/wav2lip/temp
1112
assets/*
13+
!assets/.gitkeep
1214
weights/*
15+
!weights/.gitkeep
16+
!/weights/checkpoints
17+
/weights/checkpoints/*
18+
!/weights/checkpoints/.gitkeep
1319
backend/liveportrait/templates
14-
/data/*
20+
/data/*
21+
!/data/audio
22+
/data/audio/*
23+
!/data/audio/.gitkeep
24+
!/data/wav2lip
25+
/data/wav2lip/*
26+
!/data/wav2lip/.gitkeep
27+
!/data/sadtalker
28+
/data/sadtalker/*
29+
!/data/sadtalker/.gitkeep
30+
!/data/piper
31+
/data/piper/*
32+
!/data/piper/.gitkeep

usecases/ai/digital-avatar/README.md

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,15 @@ A digital avatar that utilizes Image to Video, Text To Speech, Speech To Text, a
2727
- CPU: 13th generations of Intel Core i5 and above
2828
- GPU: Intel® Arc™ A770 graphics (16GB)
2929
- RAM: 32GB
30-
- DISK: 128GB
30+
- DISK: 256GB
3131

3232
## Application Ports
3333
Please ensure that you have these ports available before running the applications.
3434

3535
| Apps | Port |
3636
|--------------|------|
3737
| Lipsync | 8011 |
38-
| LivePortrait | 8012 |
38+
| RAG | 8012 |
3939
| TTS | 8013 |
4040
| STT | 8014 |
4141
| OLLAMA | 8015 |
@@ -50,15 +50,15 @@ Please ensure that you have these ports available before running the application
5050
1. Refer to [here](../../../README.md#gpu) to install Intel GPU Drivers
5151
1. **Download Wav2Lip Model**: Download the [Wav2Lip model](https://iiitaphyd-my.sharepoint.com/:u:/g/personal/radrabha_m_research_iiit_ac_in/EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA?e=n9ljGW) and place the file in the `weights` folder.
5252
1. **Create Avatar**:
53-
1. Place an `image.png` file containing an image of a person (preferably showing at least the upper half of the body) in the assets folder.
54-
2. Place an `idle.mp4` file of a person with some movement such as eye blinking (to be used as a reference) in the assets folder.
53+
1. Place a `video.mp4` file in the `assets` folder. The video should feature an idle person (preferably showing at least the upper half of the body) with subtle movements like blinking or slight body motion, and **no speaking**. Ensure the file is named **`video.mp4`**.
5554

5655
### Setup ENV
5756
1. Create a `.env` file and copy the contents from `.env.template`:
5857
```bash
5958
cp .env.template .env
6059
```
61-
2. Modify the `LLM_MODEL` in the `.env` file. Refer to [Ollama library](https://ollama.com/library) for available models. (Default is `QWEN2.5`).
60+
* Note: Modify the `LLM_MODEL` in the `.env` file in order to change the LLM used by ollama. Refer to [Ollama library](https://ollama.com/library) for available models. (Default is `QWEN2.5`).
61+
6262

6363
### Build Docker Container
6464
```bash
@@ -78,12 +78,14 @@ docker compose up -d
7878
### Device Workload Configurations
7979
You can offload model inference to specific device by modifying the environment variable setting in the docker-compose.yml file.
8080

81-
| Workload | Environment Variable |Supported Device |
82-
|----------------------|----------------------|-------------------------|
83-
| LLM | - | GPU |
84-
| STT | STT_DEVICE | CPU,GPU,NPU |
85-
| TTS | TTS_DEVICE | CPU |
86-
| Lipsync (Wav2lip) | DEVICE | CPU, GPU |
81+
| Workload | Environment Variable |Supported Device |
82+
|--------------------------------|----------------------|-------------------------|
83+
| LLM | - | GPU(D) |
84+
| STT | STT_DEVICE | CPU(D) ,GPU, NPU |
85+
| TTS | TTS_DEVICE | CPU(D) |
86+
| Lipsync (Wav2lip/Sadtalker) | DEVICE | CPU(D-wav2lip), GPU(D-sadtalker) |
87+
88+
* Note: (D) = default device
8789

8890
Example Configuration:
8991

@@ -94,7 +96,7 @@ stt_service:
9496
...
9597
environment:
9698
...
97-
STT_DEVICE=CPU
99+
STT_DEVICE=NPU
98100
...
99101
```
100102

usecases/ai/digital-avatar/backend/liveportrait/liveportrait/pretrained_weights/.gitkeep renamed to usecases/ai/digital-avatar/assets/.gitkeep

File renamed without changes.
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Copyright(C) 2024 Intel Corporation
2+
# SPDX - License - Identifier: Apache - 2.0
3+
4+
import os
5+
6+
from basicsr.archs.rrdbnet_arch import RRDBNet
7+
from basicsr.utils.download_util import load_file_from_url
8+
from RealESRGan.realesrgan import RealESRGANer
9+
from RealESRGan.realesrgan.archs.srvgg_arch import SRVGGNetCompact
10+
11+
12+
def initialize(model_name="RealESRGAN_x2plus", device="cpu"):
13+
models = {
14+
"RealESRGAN_x2plus": {
15+
"url": ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth"],
16+
"name": "RealESRGAN_x2plus",
17+
"model": RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2),
18+
"netscale": 2
19+
},
20+
"RealESRGAN_x4plus": {
21+
"url": ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth'],
22+
"name": "RealESRGAN_x4plus",
23+
"model": RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4),
24+
"netscale": 4
25+
},
26+
"realesr-animevideov3": {
27+
"url": ["https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth"],
28+
"name": "realesr-animevideov3",
29+
"model": SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu'),
30+
"netscale": 4
31+
},
32+
"realesr-general-x4v3":{
33+
"url": [
34+
"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth",
35+
"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth"
36+
],
37+
"name": "realesr-animevideov3",
38+
"model": SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu'),
39+
"netscale": 4
40+
},
41+
"realesr-general-x4v3-dn":{
42+
"url": [
43+
"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth"
44+
],
45+
"name": "realesr-animevideov3",
46+
"model": SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu'),
47+
"netscale": 4
48+
},
49+
"RealESRGAN_x4plus_anime_6B":{
50+
"url": [
51+
'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth'
52+
],
53+
"name": "RealESRGAN_x4plus_anime_6B",
54+
"model": RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4),
55+
"netscale": 4
56+
}
57+
}
58+
59+
if model_name not in models:
60+
raise ValueError(f"Model name {model_name} not found")
61+
62+
model = models[model_name]
63+
64+
model_path = os.path.join('weights', model_name + '.pth')
65+
if not os.path.isfile(model_path):
66+
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
67+
# model_path will be updated
68+
model_path = load_file_from_url(
69+
url=model["url"][0], model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
70+
71+
# use dni to control the denoise strength
72+
dni_weight = None
73+
74+
# restorer
75+
upsampler = RealESRGANer(
76+
scale=model["netscale"],
77+
model_path=model_path,
78+
dni_weight=dni_weight,
79+
model=model['model'],
80+
tile=0,
81+
tile_pad=10,
82+
pre_pad=0,
83+
half=False,
84+
device=device)
85+
86+
return upsampler
87+
88+
if __name__=="__main__":
89+
initialize()

usecases/ai/digital-avatar/backend/liveportrait/liveportrait/intel_xpu/attention.py renamed to usecases/ai/digital-avatar/backend/RealESRGan/overwrite/intel_xpu/attention.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
14
import os
25
import torch
36
from functools import cache

usecases/ai/digital-avatar/backend/liveportrait/liveportrait/intel_xpu/xpu_override.py renamed to usecases/ai/digital-avatar/backend/RealESRGan/overwrite/intel_xpu/xpu_override.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
14
import sys
25
import os
36
# os.add_dll_directory(os.path.join(sys.base_exec_prefix))
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
# flake8: noqa
5+
from .archs import *
6+
from .data import *
7+
from .models import *
8+
from .utils import *
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import importlib
5+
from basicsr.utils import scandir
6+
from os import path as osp
7+
8+
# automatically scan and import arch modules for registry
9+
# scan all the files that end with '_arch.py' under the archs folder
10+
arch_folder = osp.dirname(osp.abspath(__file__))
11+
arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')]
12+
# import all the arch modules
13+
_arch_modules = [importlib.import_module(f'RealESRGan.realesrgan.archs.{file_name}') for file_name in arch_filenames]

0 commit comments

Comments
 (0)