Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,6 @@ data/
protos/
utils/
*.pth

.vscode/
*.egg-info/
22 changes: 16 additions & 6 deletions download_model.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
# SyncNet model

mkdir data
wget http://www.robots.ox.ac.uk/~vgg/software/lipsync/data/syncnet_v2.model -O data/syncnet_v2.model
wget http://www.robots.ox.ac.uk/~vgg/software/lipsync/data/example.avi -O data/example.avi
# check SYNCNET_MODEL_DIR is set
if [ -z ${SYNCNET_MODEL_DIR+x} ]; then
echo "SYNCNET_MODEL_DIR is unset"
exit 1
fi

# For the pre-processing pipeline
mkdir detectors/s3fd/weights
wget https://www.robots.ox.ac.uk/~vgg/software/lipsync/data/sfd_face.pth -O detectors/s3fd/weights/sfd_face.pth
mkdir -p ${SYNCNET_MODEL_DIR}

syncnet_path=${SYNCNET_MODEL_DIR}/syncnet_v2.model
if [ ! -f ${syncnet_path} ]; then
wget http://www.robots.ox.ac.uk/~vgg/software/lipsync/data/syncnet_v2.model -O $syncnet_path
fi

sfd_path=${SYNCNET_MODEL_DIR}/sfd_face.pth
if [ ! -f ${sfd_path} ]; then
wget https://www.robots.ox.ac.uk/~vgg/software/lipsync/data/sfd_face.pth -O $sfd_path
fi
21 changes: 21 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "syncnet"
version = "0.1.0"
license = { text = "MIT" }
requires-python = ">=3.7"
dependencies = [
"torch>=1.4.0",
"torchvision>=0.5.0",
"numpy>=1.18.1",
"scipy>=1.2.1",
"scenedetect>=0.6.5.2",
"opencv-contrib-python",
"python_speech_features",
]

[tool.setuptools.packages.find]
where = ["src"]
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ torch>=1.4.0
torchvision>=0.5.0
numpy>=1.18.1
scipy>=1.2.1
scenedetect==0.5.1
scenedetect>=0.6.5.2
opencv-contrib-python
python_speech_features
45 changes: 0 additions & 45 deletions run_syncnet.py

This file was deleted.

32 changes: 16 additions & 16 deletions SyncNetInstance.py → src/syncnet/SyncNetInstance.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@

from scipy import signal
from scipy.io import wavfile
from SyncNetModel import *
from syncnet.SyncNetModel import S
from shutil import rmtree


# ==================== Get OFFSET ====================

def calc_pdist(feat1, feat2, vshift=10):

win_size = vshift*2+1

feat2p = torch.nn.functional.pad(feat2,(0,0,vshift,vshift))
Expand Down Expand Up @@ -52,18 +52,18 @@ def evaluate(self, opt, videofile):

os.makedirs(os.path.join(opt.tmp_dir,opt.reference))

command = ("ffmpeg -y -i %s -threads 1 -f image2 %s" % (videofile,os.path.join(opt.tmp_dir,opt.reference,'%06d.jpg')))
command = ("ffmpeg -y -i %s -threads 1 -f image2 %s" % (videofile,os.path.join(opt.tmp_dir,opt.reference,'%06d.jpg')))
output = subprocess.call(command, shell=True, stdout=None)

command = ("ffmpeg -y -i %s -async 1 -ac 1 -vn -acodec pcm_s16le -ar 16000 %s" % (videofile,os.path.join(opt.tmp_dir,opt.reference,'audio.wav')))
command = ("ffmpeg -y -i %s -async 1 -ac 1 -vn -acodec pcm_s16le -ar 16000 %s" % (videofile,os.path.join(opt.tmp_dir,opt.reference,'audio.wav')))
output = subprocess.call(command, shell=True, stdout=None)

# ========== ==========
# Load video
# Load video
# ========== ==========

images = []

flist = glob.glob(os.path.join(opt.tmp_dir,opt.reference,'*.jpg'))
flist.sort()

Expand Down Expand Up @@ -95,7 +95,7 @@ def evaluate(self, opt, videofile):
print("WARNING: Audio (%.4fs) and video (%.4fs) lengths are different."%(float(len(audio))/16000,float(len(images))/25))

min_length = min(len(images),math.floor(len(audio)/640))

# ========== ==========
# Generate video and audio feats
# ========== ==========
Expand All @@ -106,7 +106,7 @@ def evaluate(self, opt, videofile):

tS = time.time()
for i in range(0,lastframe,opt.batch_size):

im_batch = [ imtv[:,:,vframe:vframe+5,:,:] for vframe in range(i,min(lastframe,i+opt.batch_size)) ]
im_in = torch.cat(im_batch,0)
im_out = self.__S__.forward_lip(im_in.cuda());
Expand All @@ -123,7 +123,7 @@ def evaluate(self, opt, videofile):
# ========== ==========
# Compute offset
# ========== ==========

print('Compute time %.3f sec.' % (time.time()-tS))

dists = calc_pdist(im_feat,cc_feat,vshift=opt.vshift)
Expand All @@ -138,7 +138,7 @@ def evaluate(self, opt, videofile):
# fdist = numpy.pad(fdist, (3,3), 'constant', constant_values=15)
fconf = torch.median(mdist).numpy() - fdist
fconfm = signal.medfilt(fconf,kernel_size=9)

numpy.set_printoptions(formatter={'float': '{: 0.3f}'.format})
print('Framewise conf: ')
print(fconfm)
Expand All @@ -150,9 +150,9 @@ def evaluate(self, opt, videofile):
def extract_feature(self, opt, videofile):

self.__S__.eval();

# ========== ==========
# Load video
# Load video
# ========== ==========
cap = cv2.VideoCapture(videofile)

Expand All @@ -171,7 +171,7 @@ def extract_feature(self, opt, videofile):
im = numpy.transpose(im,(0,3,4,1,2))

imtv = torch.autograd.Variable(torch.from_numpy(im.astype(float)).float())

# ========== ==========
# Generate video feats
# ========== ==========
Expand All @@ -181,7 +181,7 @@ def extract_feature(self, opt, videofile):

tS = time.time()
for i in range(0,lastframe,opt.batch_size):

im_batch = [ imtv[:,:,vframe:vframe+5,:,:] for vframe in range(i,min(lastframe,i+opt.batch_size)) ]
im_in = torch.cat(im_batch,0)
im_out = self.__S__.forward_lipfeat(im_in.cuda());
Expand All @@ -192,7 +192,7 @@ def extract_feature(self, opt, videofile):
# ========== ==========
# Compute offset
# ========== ==========

print('Compute time %.3f sec.' % (time.time()-tS))

return im_feat
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
import numpy as np
import cv2
import torch
from torchvision import transforms
from .nets import S3FDNet
from .box_utils import nms_
import os

PATH_WEIGHT = './detectors/s3fd/weights/sfd_face.pth'
PATH_WEIGHT = os.path.join(os.environ["SYNCNET_MODEL_DIR"], "sfd_face.pth")
img_mean = np.array([104., 117., 123.])[:, np.newaxis, np.newaxis].astype('float32')


Expand All @@ -23,7 +23,7 @@ def __init__(self, device='cuda'):
self.net.load_state_dict(state_dict)
self.net.eval()
print('[S3FD] finished loading (%.4f sec)' % (time.time() - tstamp))

def detect_faces(self, image, conf_th=0.8, scales=[1]):

w, h = image.shape[1], image.shape[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def nms_(dets, thresh):
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]

return np.array(keep).astype(np.int)
return np.array(keep).astype(np.int32)


def decode(loc, priors, variances):
Expand Down
File renamed without changes.
45 changes: 45 additions & 0 deletions src/syncnet/run_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os
import argparse
from syncnet.run_pipeline import run_pipeline
from syncnet.run_syncnet import run_syncnet
from tempfile import TemporaryDirectory
from pathlib import Path


def run_all(video_path: str) -> float:
syncnet_model_path = Path(os.environ["SYNCNET_MODEL_DIR"]) / "syncnet_v2.model"
assert syncnet_model_path.exists()

with TemporaryDirectory() as tmp_dir:
pipeline_opts = argparse.Namespace(
data_dir=tmp_dir,
videofile=video_path,
reference="",
facedet_scale=0.25,
crop_scale=0.4,
min_track=100,
frame_rate=25,
num_failed_det=25,
min_face_size=100,
)
run_pipeline(pipeline_opts)

processed_video_path = Path(tmp_dir) / "pycrop" / "00000.avi"
assert processed_video_path.exists()

syncnet_opts = argparse.Namespace(
initial_model=os.path.join(os.environ["SYNCNET_MODEL_DIR"], "syncnet_v2.model"),
batch_size=20,
vshift=1,
data_dir=tmp_dir,
videofile=str(processed_video_path),
reference="",
)
return run_syncnet(syncnet_opts)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description = "SyncNet")
parser.add_argument("--video_path", type=str, required=True, help="Path to the video file")
args = parser.parse_args()
result = run_all(**vars(args))
Loading