diff --git a/camera.bat b/camera.bat index 81400ab..110912f 100644 --- a/camera.bat +++ b/camera.bat @@ -2,7 +2,7 @@ setlocal enabledelayedexpansion REM 设置默认源图像路径 -set "default_src_image=assets\examples\source\s12.jpg" +set "default_src_image=Medusa\redtopG.png" set "src_image=%default_src_image%" set "animal_param=" set "paste_back=" @@ -27,6 +27,6 @@ echo use animal: [!animal_param!] echo paste_back: [!paste_back!] REM 执行Python命令 -.\venv\python.exe .\run.py --cfg configs/trt_infer.yaml --realtime --dri_video 0 --src_image !src_image! !animal_param! !paste_back! +.\venv\python.exe .\run.py --cfg configs/trt_infer.yaml --realtime --dri_webcam 0 --src_webcam -1 --src_image !src_image! !animal_param! !paste_back! -endlocal \ No newline at end of file +endlocal diff --git a/run.py b/run.py index 64c7379..21b5b45 100644 --- a/run.py +++ b/run.py @@ -8,14 +8,14 @@ # video python run.py \ --src_image assets/examples/driving/d13.mp4 \ - --dri_video assets/examples/driving/d11.mp4 \ + --dri_webcam assets/examples/driving/d11.mp4 \ --cfg configs/trt_infer.yaml \ --paste_back \ --animal # pkl python run.py \ --src_image assets/examples/source/s12.jpg \ - --dri_video ./results/2024-09-13-081710/d0.mp4.pkl \ + --dri_webcam ./results/2024-09-13-081710/d0.mp4.pkl \ --cfg configs/trt_infer.yaml \ --paste_back \ --animal @@ -47,20 +47,25 @@ def run_with_video(args): infer_cfg = OmegaConf.load(args.cfg) infer_cfg.infer_params.flag_pasteback = args.paste_back - + if int(args.src_webcam)>0: + sourceisWebcam = True + else: + sourceisWebcam = False + src_webcam_updateRealtime = False + src_webcam_updatetimer = 0 pipe = FasterLivePortraitPipeline(cfg=infer_cfg, is_animal=args.animal) ret = pipe.prepare_source(args.src_image, realtime=args.realtime) if not ret: print(f"no face in {args.src_image}! exit!") exit(1) - if not args.dri_video or not os.path.exists(args.dri_video): + if not args.dri_webcam or not os.path.exists(args.dri_webcam): # read frame from camera if no driving video input - vcap = cv2.VideoCapture(0) + vcap = cv2.VideoCapture(int(args.dri_webcam)) if not vcap.isOpened(): print("no camera found! exit!") exit(1) else: - vcap = cv2.VideoCapture(args.dri_video) + vcap = cv2.VideoCapture(args.dri_webcam) fps = int(vcap.get(cv2.CAP_PROP_FPS)) h, w = pipe.src_imgs[0].shape[:2] save_dir = f"./results/{datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')}" @@ -70,10 +75,10 @@ def run_with_video(args): if not args.realtime: fourcc = cv2.VideoWriter_fourcc(*'mp4v') vsave_crop_path = os.path.join(save_dir, - f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_video)}-crop.mp4") + f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_webcam)}-crop.mp4") vout_crop = cv2.VideoWriter(vsave_crop_path, fourcc, fps, (512 * 2, 512)) vsave_org_path = os.path.join(save_dir, - f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_video)}-org.mp4") + f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_webcam)}-org.mp4") vout_org = cv2.VideoWriter(vsave_org_path, fourcc, fps, (w, h)) infer_times = [] @@ -111,28 +116,99 @@ def run_with_video(args): else: if infer_cfg.infer_params.flag_pasteback: out_org = cv2.cvtColor(out_org, cv2.COLOR_RGB2BGR) - cv2.imshow('Render', out_org) + cv2.imshow('Render, Q > exit, S > Stitching, Z > RelativeMotion, X > AnimationRegion, C > CropDrivingVideo, Space > Webcamassource, R > SwitchRealtimeWebcamUpdate',out_org) else: # image show in realtime mode - cv2.imshow('Render', out_crop) - # 按下'q'键退出循环 - if cv2.waitKey(1) & 0xFF == ord('q'): + cv2.imshow('Render, Q > exit, S > Stitching, Z > RelativeMotion, X > AnimationRegion, C > CropDrivingVideo, Space > Webcamassource, R > SwitchRealtimeWebcamUpdate', out_crop) + # Press the 'q' key to exit the loop, r to switch realtime src_webcam update, spacebar to switch sourceisWebcam + k = cv2.waitKey(1) & 0xFF + if k == ord('q'): break + # Key for Interesting Params + if k == ord('s'): + infer_cfg.infer_params.flag_stitching = not infer_cfg.infer_params.flag_stitching + print('flag_stitching:'+str(infer_cfg.infer_params.flag_stitching)) + if k == ord('z'): + infer_cfg.infer_params.flag_relative_motion = not infer_cfg.infer_params.flag_relative_motion + print('flag_relative_motion:'+str(infer_cfg.infer_params.flag_relative_motion)) + if k == ord('x'): + if infer_cfg.infer_params.animation_region == "all": infer_cfg.infer_params.animation_region = "exp", print('animation_region = "exp"') + else:infer_cfg.infer_params.animation_region = "all", print('animation_region = "all"') + if k == ord('c'): + infer_cfg.infer_params.flag_crop_driving_video = not infer_cfg.infer_params.flag_crop_driving_video + print('flag_crop_driving_video:'+str(infer_cfg.infer_params.flag_crop_driving_video)) + if k == ord('v'): + infer_cfg.infer_params.flag_pasteback = not infer_cfg.infer_params.flag_pasteback + print('flag_pasteback:'+str(infer_cfg.infer_params.flag_pasteback)) + + if k == ord('a'): + infer_cfg.infer_params.flag_normalize_lip = not infer_cfg.infer_params.flag_normalize_lip + print('flag_normalize_lip:'+str(infer_cfg.infer_params.flag_normalize_lip)) + if k == ord('d'): + infer_cfg.infer_params.flag_source_video_eye_retargeting = not infer_cfg.infer_params.flag_source_video_eye_retargeting + print('flag_source_video_eye_retargeting:'+str(infer_cfg.infer_params.flag_source_video_eye_retargeting)) + if k == ord('f'): + infer_cfg.infer_params.flag_video_editing_head_rotation = not infer_cfg.infer_params.flag_video_editing_head_rotation + print('flag_video_editing_head_rotation:'+str(infer_cfg.infer_params.flag_video_editing_head_rotation)) + if k == ord('g'): + infer_cfg.infer_params.flag_eye_retargeting = not infer_cfg.infer_params.flag_eye_retargeting + print('flag_eye_retargeting:'+str(infer_cfg.infer_params.flag_eye_retargeting)) + + if k == ord('k'): + infer_cfg.crop_params.src_scale -= 0.1 + ret = pipe.prepare_source(args.src_image, realtime=args.realtime) + print('src_scale:'+str(infer_cfg.crop_params.src_scale)) + if k == ord('l'): + infer_cfg.crop_params.src_scale += 0.1 + ret = pipe.prepare_source(args.src_image, realtime=args.realtime) + print('src_scale:'+str(infer_cfg.crop_params.src_scale)) + if k == ord('n'): + infer_cfg.crop_params.dri_scale -= 0.1 + print('dri_scale:'+str(infer_cfg.crop_params.dri_scale)) + if k == ord('m'): + infer_cfg.crop_params.dri_scale += 0.1 + print('dri_scale:'+str(infer_cfg.crop_params.dri_scale)) + + #pipe.update_cfg(infer_cfg) + elif k == ord('r'): + src_webcam_updateRealtime = not src_webcam_updateRealtime + print('Switching realtime update of source webcam ') + elif k == 32: + sourceisWebcam = not sourceisWebcam + print('Switch Source Type, src_image file:',args.src_image, 'src_webcam index:',args.src_webcam) + print(sourceisWebcam) + #Reverting source to image + if not sourceisWebcam: + ret = pipe.prepare_source(args.src_image, realtime=args.realtime) + #Updating src_webcam frame + if sourceisWebcam and int(args.src_webcam)>-1: + #Realime Update + if src_webcam_updateRealtime: + ret = pipe.prepare_source(args.src_image, realtime=args.realtime, srcwebcamindex=int(args.src_webcam)) + else: + if src_webcam_updatetimer <= 0: + ret = pipe.prepare_source(args.src_image, realtime=args.realtime, srcwebcamindex=int(args.src_webcam)) + src_webcam_updatetimer = 60 + src_webcam_updatetimer -= 1 + + + + vcap.release() if not args.realtime: vout_crop.release() vout_org.release() - if video_has_audio(args.dri_video): + if video_has_audio(args.dri_webcam): vsave_crop_path_new = os.path.splitext(vsave_crop_path)[0] + "-audio.mp4" subprocess.call( - [FFMPEG, "-i", vsave_crop_path, "-i", args.dri_video, + [FFMPEG, "-i", vsave_crop_path, "-i", args.dri_webcam, "-b:v", "10M", "-c:v", "libx264", "-map", "0:v", "-map", "1:a", "-c:a", "aac", "-pix_fmt", "yuv420p", vsave_crop_path_new, "-y", "-shortest"]) vsave_org_path_new = os.path.splitext(vsave_org_path)[0] + "-audio.mp4" subprocess.call( - [FFMPEG, "-i", vsave_org_path, "-i", args.dri_video, + [FFMPEG, "-i", vsave_org_path, "-i", args.dri_webcam, "-b:v", "10M", "-c:v", "libx264", "-map", "0:v", "-map", "1:a", "-c:a", "aac", @@ -158,7 +234,7 @@ def run_with_video(args): 'c_lip_lst': c_lip_lst, } template_pkl_path = os.path.join(save_dir, - f"{os.path.basename(args.dri_video)}.pkl") + f"{os.path.basename(args.dri_webcam)}.pkl") with open(template_pkl_path, "wb") as fw: pickle.dump(template_dct, fw) print(f"save driving motion pkl file at : {template_pkl_path}") @@ -173,7 +249,7 @@ def run_with_pkl(args): if not ret: print(f"no face in {args.src_image}! exit!") return - with open(args.dri_video, "rb") as fin: + with open(args.dri_webcam, "rb") as fin: dri_motion_infos = pickle.load(fin) fps = int(dri_motion_infos["output_fps"]) @@ -185,10 +261,10 @@ def run_with_pkl(args): if not args.realtime: fourcc = cv2.VideoWriter_fourcc(*'mp4v') vsave_crop_path = os.path.join(save_dir, - f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_video)}-crop.mp4") + f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_webcam)}-crop.mp4") vout_crop = cv2.VideoWriter(vsave_crop_path, fourcc, fps, (512, 512)) vsave_org_path = os.path.join(save_dir, - f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_video)}-org.mp4") + f"{os.path.basename(args.src_image)}-{os.path.basename(args.dri_webcam)}-org.mp4") vout_org = cv2.VideoWriter(vsave_org_path, fourcc, fps, (w, h)) infer_times = [] @@ -229,17 +305,17 @@ def run_with_pkl(args): if not args.realtime: vout_crop.release() vout_org.release() - if video_has_audio(args.dri_video): + if video_has_audio(args.dri_webcam): vsave_crop_path_new = os.path.splitext(vsave_crop_path)[0] + "-audio.mp4" subprocess.call( - [FFMPEG, "-i", vsave_crop_path, "-i", args.dri_video, + [FFMPEG, "-i", vsave_crop_path, "-i", args.dri_webcam, "-b:v", "10M", "-c:v", "libx264", "-map", "0:v", "-map", "1:a", "-c:a", "aac", "-pix_fmt", "yuv420p", vsave_crop_path_new, "-y", "-shortest"]) vsave_org_path_new = os.path.splitext(vsave_org_path)[0] + "-audio.mp4" subprocess.call( - [FFMPEG, "-i", vsave_org_path, "-i", args.dri_video, + [FFMPEG, "-i", vsave_org_path, "-i", args.dri_webcam, "-b:v", "10M", "-c:v", "libx264", "-map", "0:v", "-map", "1:a", "-c:a", "aac", @@ -262,15 +338,17 @@ def run_with_pkl(args): parser = argparse.ArgumentParser(description='Faster Live Portrait Pipeline') parser.add_argument('--src_image', required=False, type=str, default="assets/examples/source/s12.jpg", help='source image') - parser.add_argument('--dri_video', required=False, type=str, default="assets/examples/driving/d14.mp4", - help='driving video') + parser.add_argument('--src_webcam', required=False, type=int, default=-1, + help='source webcam') + parser.add_argument('--dri_webcam', required=False, type=str, default="assets/examples/driving/d14.mp4", + help='driving video') parser.add_argument('--cfg', required=False, type=str, default="configs/onnx_infer.yaml", help='inference config') parser.add_argument('--realtime', action='store_true', help='realtime inference') parser.add_argument('--animal', action='store_true', help='use animal model') parser.add_argument('--paste_back', action='store_true', default=False, help='paste back to origin image') args, unknown = parser.parse_known_args() - if args.dri_video.endswith(".pkl"): + if args.dri_webcam.endswith(".pkl"): run_with_pkl(args) else: run_with_video(args) diff --git a/src/pipelines/faster_live_portrait_pipeline.py b/src/pipelines/faster_live_portrait_pipeline.py index 9031fc3..e0f31fb 100644 --- a/src/pipelines/faster_live_portrait_pipeline.py +++ b/src/pipelines/faster_live_portrait_pipeline.py @@ -125,25 +125,31 @@ def calc_combined_lip_ratio(self, c_d_lip_i, source_lmk): return combined_lip_ratio_tensor def prepare_source(self, source_path, **kwargs): - print(f"process source:{source_path} >>>>>>>>") try: - if utils.is_video(source_path): - self.is_source_video = True - else: - self.is_source_video = False - - if self.is_source_video: - src_imgs_bgr = [] - src_vcap = cv2.VideoCapture(source_path) - while True: - ret, frame = src_vcap.read() - if not ret: - break - src_imgs_bgr.append(frame) + if 'srcwebcamindex' in kwargs: + src_vcap = cv2.VideoCapture(kwargs.get('srcwebcamindex')) + _, frame = src_vcap.read() + src_imgs_bgr = [frame] src_vcap.release() else: - img_bgr = cv2.imread(source_path, cv2.IMREAD_COLOR) - src_imgs_bgr = [img_bgr] + print(f"process source:{source_path} >>>>>>>>") + if utils.is_video(source_path): + self.is_source_video = True + else: + self.is_source_video = False + + if self.is_source_video: + src_imgs_bgr = [] + src_vcap = cv2.VideoCapture(source_path) + while True: + ret, frame = src_vcap.read() + if not ret: + break + src_imgs_bgr.append(frame) + src_vcap.release() + else: + img_bgr = cv2.imread(source_path, cv2.IMREAD_COLOR) + src_imgs_bgr = [img_bgr] self.src_imgs = [] self.src_infos = [] @@ -261,7 +267,9 @@ def prepare_source(self, source_path, **kwargs): M = torch.from_numpy(crop_info['M_c2o']).to(self.device) src_infos[i].append(M) self.src_infos.append(src_infos[:]) - print(f"finish process source:{source_path} >>>>>>>>") + #to not spam + if not 'srcwebcamindex' in kwargs: + print(f"finish process source:{source_path} >>>>>>>>") return len(self.src_infos) > 0 except Exception as e: traceback.print_exc()