Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions camera.bat
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
setlocal enabledelayedexpansion

REM 设置默认源图像路径
set "default_src_image=assets\examples\source\s12.jpg"
set "default_src_image=Medusa\redtopG.png"
set "src_image=%default_src_image%"
set "animal_param="
set "paste_back="
Expand All @@ -27,6 +27,6 @@ echo use animal: [!animal_param!]
echo paste_back: [!paste_back!]

REM 执行Python命令
.\venv\python.exe .\run.py --cfg configs/trt_infer.yaml --realtime --dri_video 0 --src_image !src_image! !animal_param! !paste_back!
.\venv\python.exe .\run.py --cfg configs/trt_infer.yaml --realtime --dri_webcam 0 --src_webcam -1 --src_image !src_image! !animal_param! !paste_back!

endlocal
endlocal
128 changes: 103 additions & 25 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
# video
python run.py \
--src_image assets/examples/driving/d13.mp4 \
--dri_video assets/examples/driving/d11.mp4 \
--dri_webcam assets/examples/driving/d11.mp4 \
--cfg configs/trt_infer.yaml \
--paste_back \
--animal
# pkl
python run.py \
--src_image assets/examples/source/s12.jpg \
--dri_video ./results/2024-09-13-081710/d0.mp4.pkl \
--dri_webcam ./results/2024-09-13-081710/d0.mp4.pkl \
--cfg configs/trt_infer.yaml \
--paste_back \
--animal
Expand Down Expand Up @@ -47,20 +47,25 @@
def run_with_video(args):
infer_cfg = OmegaConf.load(args.cfg)
infer_cfg.infer_params.flag_pasteback = args.paste_back

if int(args.src_webcam)>0:
sourceisWebcam = True
else:
sourceisWebcam = False
src_webcam_updateRealtime = False
src_webcam_updatetimer = 0
pipe = FasterLivePortraitPipeline(cfg=infer_cfg, is_animal=args.animal)
ret = pipe.prepare_source(args.src_image, realtime=args.realtime)
if not ret:
print(f"no face in {args.src_image}! exit!")
exit(1)
if not args.dri_video or not os.path.exists(args.dri_video):
if not args.dri_webcam or not os.path.exists(args.dri_webcam):
# read frame from camera if no driving video input
vcap = cv2.VideoCapture(0)
vcap = cv2.VideoCapture(int(args.dri_webcam))
if not vcap.isOpened():
print("no camera found! exit!")
exit(1)
else:
vcap = cv2.VideoCapture(args.dri_video)
vcap = cv2.VideoCapture(args.dri_webcam)
fps = int(vcap.get(cv2.CAP_PROP_FPS))
h, w = pipe.src_imgs[0].shape[:2]
save_dir = f"./results/{datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')}"
Expand All @@ -70,10 +75,10 @@ def run_with_video(args):
if not args.realtime:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
vsave_crop_path = os.path.join(save_dir,
f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_video)}-crop.mp4")
f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_webcam)}-crop.mp4")
vout_crop = cv2.VideoWriter(vsave_crop_path, fourcc, fps, (512 * 2, 512))
vsave_org_path = os.path.join(save_dir,
f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_video)}-org.mp4")
f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_webcam)}-org.mp4")
vout_org = cv2.VideoWriter(vsave_org_path, fourcc, fps, (w, h))

infer_times = []
Expand Down Expand Up @@ -111,28 +116,99 @@ def run_with_video(args):
else:
if infer_cfg.infer_params.flag_pasteback:
out_org = cv2.cvtColor(out_org, cv2.COLOR_RGB2BGR)
cv2.imshow('Render', out_org)
cv2.imshow('Render, Q > exit, S > Stitching, Z > RelativeMotion, X > AnimationRegion, C > CropDrivingVideo, Space > Webcamassource, R > SwitchRealtimeWebcamUpdate',out_org)
else:
# image show in realtime mode
cv2.imshow('Render', out_crop)
# 按下'q'键退出循环
if cv2.waitKey(1) & 0xFF == ord('q'):
cv2.imshow('Render, Q > exit, S > Stitching, Z > RelativeMotion, X > AnimationRegion, C > CropDrivingVideo, Space > Webcamassource, R > SwitchRealtimeWebcamUpdate', out_crop)
# Press the 'q' key to exit the loop, r to switch realtime src_webcam update, spacebar to switch sourceisWebcam
k = cv2.waitKey(1) & 0xFF
if k == ord('q'):
break
# Key for Interesting Params
if k == ord('s'):
infer_cfg.infer_params.flag_stitching = not infer_cfg.infer_params.flag_stitching
print('flag_stitching:'+str(infer_cfg.infer_params.flag_stitching))
if k == ord('z'):
infer_cfg.infer_params.flag_relative_motion = not infer_cfg.infer_params.flag_relative_motion
print('flag_relative_motion:'+str(infer_cfg.infer_params.flag_relative_motion))
if k == ord('x'):
if infer_cfg.infer_params.animation_region == "all": infer_cfg.infer_params.animation_region = "exp", print('animation_region = "exp"')
else:infer_cfg.infer_params.animation_region = "all", print('animation_region = "all"')
if k == ord('c'):
infer_cfg.infer_params.flag_crop_driving_video = not infer_cfg.infer_params.flag_crop_driving_video
print('flag_crop_driving_video:'+str(infer_cfg.infer_params.flag_crop_driving_video))
if k == ord('v'):
infer_cfg.infer_params.flag_pasteback = not infer_cfg.infer_params.flag_pasteback
print('flag_pasteback:'+str(infer_cfg.infer_params.flag_pasteback))

if k == ord('a'):
infer_cfg.infer_params.flag_normalize_lip = not infer_cfg.infer_params.flag_normalize_lip
print('flag_normalize_lip:'+str(infer_cfg.infer_params.flag_normalize_lip))
if k == ord('d'):
infer_cfg.infer_params.flag_source_video_eye_retargeting = not infer_cfg.infer_params.flag_source_video_eye_retargeting
print('flag_source_video_eye_retargeting:'+str(infer_cfg.infer_params.flag_source_video_eye_retargeting))
if k == ord('f'):
infer_cfg.infer_params.flag_video_editing_head_rotation = not infer_cfg.infer_params.flag_video_editing_head_rotation
print('flag_video_editing_head_rotation:'+str(infer_cfg.infer_params.flag_video_editing_head_rotation))
if k == ord('g'):
infer_cfg.infer_params.flag_eye_retargeting = not infer_cfg.infer_params.flag_eye_retargeting
print('flag_eye_retargeting:'+str(infer_cfg.infer_params.flag_eye_retargeting))

if k == ord('k'):
infer_cfg.crop_params.src_scale -= 0.1
ret = pipe.prepare_source(args.src_image, realtime=args.realtime)
print('src_scale:'+str(infer_cfg.crop_params.src_scale))
if k == ord('l'):
infer_cfg.crop_params.src_scale += 0.1
ret = pipe.prepare_source(args.src_image, realtime=args.realtime)
print('src_scale:'+str(infer_cfg.crop_params.src_scale))
if k == ord('n'):
infer_cfg.crop_params.dri_scale -= 0.1
print('dri_scale:'+str(infer_cfg.crop_params.dri_scale))
if k == ord('m'):
infer_cfg.crop_params.dri_scale += 0.1
print('dri_scale:'+str(infer_cfg.crop_params.dri_scale))

#pipe.update_cfg(infer_cfg)
elif k == ord('r'):
src_webcam_updateRealtime = not src_webcam_updateRealtime
print('Switching realtime update of source webcam ')
elif k == 32:
sourceisWebcam = not sourceisWebcam
print('Switch Source Type, src_image file:',args.src_image, 'src_webcam index:',args.src_webcam)
print(sourceisWebcam)
#Reverting source to image
if not sourceisWebcam:
ret = pipe.prepare_source(args.src_image, realtime=args.realtime)
#Updating src_webcam frame
if sourceisWebcam and int(args.src_webcam)>-1:
#Realime Update
if src_webcam_updateRealtime:
ret = pipe.prepare_source(args.src_image, realtime=args.realtime, srcwebcamindex=int(args.src_webcam))
else:
if src_webcam_updatetimer <= 0:
ret = pipe.prepare_source(args.src_image, realtime=args.realtime, srcwebcamindex=int(args.src_webcam))
src_webcam_updatetimer = 60
src_webcam_updatetimer -= 1




vcap.release()
if not args.realtime:
vout_crop.release()
vout_org.release()
if video_has_audio(args.dri_video):
if video_has_audio(args.dri_webcam):
vsave_crop_path_new = os.path.splitext(vsave_crop_path)[0] + "-audio.mp4"
subprocess.call(
[FFMPEG, "-i", vsave_crop_path, "-i", args.dri_video,
[FFMPEG, "-i", vsave_crop_path, "-i", args.dri_webcam,
"-b:v", "10M", "-c:v",
"libx264", "-map", "0:v", "-map", "1:a",
"-c:a", "aac",
"-pix_fmt", "yuv420p", vsave_crop_path_new, "-y", "-shortest"])
vsave_org_path_new = os.path.splitext(vsave_org_path)[0] + "-audio.mp4"
subprocess.call(
[FFMPEG, "-i", vsave_org_path, "-i", args.dri_video,
[FFMPEG, "-i", vsave_org_path, "-i", args.dri_webcam,
"-b:v", "10M", "-c:v",
"libx264", "-map", "0:v", "-map", "1:a",
"-c:a", "aac",
Expand All @@ -158,7 +234,7 @@ def run_with_video(args):
'c_lip_lst': c_lip_lst,
}
template_pkl_path = os.path.join(save_dir,
f"{os.path.basename(args.dri_video)}.pkl")
f"{os.path.basename(args.dri_webcam)}.pkl")
with open(template_pkl_path, "wb") as fw:
pickle.dump(template_dct, fw)
print(f"save driving motion pkl file at : {template_pkl_path}")
Expand All @@ -173,7 +249,7 @@ def run_with_pkl(args):
if not ret:
print(f"no face in {args.src_image}! exit!")
return
with open(args.dri_video, "rb") as fin:
with open(args.dri_webcam, "rb") as fin:
dri_motion_infos = pickle.load(fin)

fps = int(dri_motion_infos["output_fps"])
Expand All @@ -185,10 +261,10 @@ def run_with_pkl(args):
if not args.realtime:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
vsave_crop_path = os.path.join(save_dir,
f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_video)}-crop.mp4")
f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_webcam)}-crop.mp4")
vout_crop = cv2.VideoWriter(vsave_crop_path, fourcc, fps, (512, 512))
vsave_org_path = os.path.join(save_dir,
f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_video)}-org.mp4")
f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_webcam)}-org.mp4")
vout_org = cv2.VideoWriter(vsave_org_path, fourcc, fps, (w, h))

infer_times = []
Expand Down Expand Up @@ -229,17 +305,17 @@ def run_with_pkl(args):
if not args.realtime:
vout_crop.release()
vout_org.release()
if video_has_audio(args.dri_video):
if video_has_audio(args.dri_webcam):
vsave_crop_path_new = os.path.splitext(vsave_crop_path)[0] + "-audio.mp4"
subprocess.call(
[FFMPEG, "-i", vsave_crop_path, "-i", args.dri_video,
[FFMPEG, "-i", vsave_crop_path, "-i", args.dri_webcam,
"-b:v", "10M", "-c:v",
"libx264", "-map", "0:v", "-map", "1:a",
"-c:a", "aac",
"-pix_fmt", "yuv420p", vsave_crop_path_new, "-y", "-shortest"])
vsave_org_path_new = os.path.splitext(vsave_org_path)[0] + "-audio.mp4"
subprocess.call(
[FFMPEG, "-i", vsave_org_path, "-i", args.dri_video,
[FFMPEG, "-i", vsave_org_path, "-i", args.dri_webcam,
"-b:v", "10M", "-c:v",
"libx264", "-map", "0:v", "-map", "1:a",
"-c:a", "aac",
Expand All @@ -262,15 +338,17 @@ def run_with_pkl(args):
parser = argparse.ArgumentParser(description='Faster Live Portrait Pipeline')
parser.add_argument('--src_image', required=False, type=str, default="assets/examples/source/s12.jpg",
help='source image')
parser.add_argument('--dri_video', required=False, type=str, default="assets/examples/driving/d14.mp4",
help='driving video')
parser.add_argument('--src_webcam', required=False, type=int, default=-1,
help='source webcam')
parser.add_argument('--dri_webcam', required=False, type=str, default="assets/examples/driving/d14.mp4",
help='driving video')
parser.add_argument('--cfg', required=False, type=str, default="configs/onnx_infer.yaml", help='inference config')
parser.add_argument('--realtime', action='store_true', help='realtime inference')
parser.add_argument('--animal', action='store_true', help='use animal model')
parser.add_argument('--paste_back', action='store_true', default=False, help='paste back to origin image')
args, unknown = parser.parse_known_args()

if args.dri_video.endswith(".pkl"):
if args.dri_webcam.endswith(".pkl"):
run_with_pkl(args)
else:
run_with_video(args)
42 changes: 25 additions & 17 deletions src/pipelines/faster_live_portrait_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,25 +125,31 @@ def calc_combined_lip_ratio(self, c_d_lip_i, source_lmk):
return combined_lip_ratio_tensor

def prepare_source(self, source_path, **kwargs):
print(f"process source:{source_path} >>>>>>>>")
try:
if utils.is_video(source_path):
self.is_source_video = True
else:
self.is_source_video = False

if self.is_source_video:
src_imgs_bgr = []
src_vcap = cv2.VideoCapture(source_path)
while True:
ret, frame = src_vcap.read()
if not ret:
break
src_imgs_bgr.append(frame)
if 'srcwebcamindex' in kwargs:
src_vcap = cv2.VideoCapture(kwargs.get('srcwebcamindex'))
_, frame = src_vcap.read()
src_imgs_bgr = [frame]
src_vcap.release()
else:
img_bgr = cv2.imread(source_path, cv2.IMREAD_COLOR)
src_imgs_bgr = [img_bgr]
print(f"process source:{source_path} >>>>>>>>")
if utils.is_video(source_path):
self.is_source_video = True
else:
self.is_source_video = False

if self.is_source_video:
src_imgs_bgr = []
src_vcap = cv2.VideoCapture(source_path)
while True:
ret, frame = src_vcap.read()
if not ret:
break
src_imgs_bgr.append(frame)
src_vcap.release()
else:
img_bgr = cv2.imread(source_path, cv2.IMREAD_COLOR)
src_imgs_bgr = [img_bgr]

self.src_imgs = []
self.src_infos = []
Expand Down Expand Up @@ -261,7 +267,9 @@ def prepare_source(self, source_path, **kwargs):
M = torch.from_numpy(crop_info['M_c2o']).to(self.device)
src_infos[i].append(M)
self.src_infos.append(src_infos[:])
print(f"finish process source:{source_path} >>>>>>>>")
#to not spam
if not 'srcwebcamindex' in kwargs:
print(f"finish process source:{source_path} >>>>>>>>")
return len(self.src_infos) > 0
except Exception as e:
traceback.print_exc()
Expand Down