carykh · nivkner · Jan 1, 2019 · Jan 1, 2019 · Jan 1, 2019
diff --git a/README.md b/README.md
@@ -1,4 +1,17 @@
 # videoToVoice
+
+to install the dependencies using pip use requirements.txt file:
+
+```
+pip install -r requirements.txt
+```
+
+tensorflow might fail to install if you are using an incompatible version of python (python 3.7 as of the time of writing)
+in which case you might have to install it manually
+
+the scripts also depend on ffmpeg being installed for video and audio conversions
+
+
 These files take in a sequence of lip images, and predict the phonemes being said.
 
 pyTubeTest.py takes in a YouTube URL, downloads that video onto the computer, turns the video into an image sequence, tries to find faces in the images, and also extracts the audio from the video and saves that, too. Earlier, we tried to get pyTubeTest.py to also convert the audio into spectrograms with ARSS in the same code, but that just didn’t work because all the libraries required for the first steps only work in Ubuntu, and ARSS only works in Windows.

diff --git a/lipTester.py b/lipTester.py
@@ -1,4 +1,5 @@
 import face_recognition
+from pathlib import Path
 from scipy import misc
 margin = 25
 maxWidth = 0
@@ -13,7 +14,8 @@
     while len(strIndex) < 4:
         strIndex = "0"+strIndex
 
-    image = face_recognition.load_image_file("/media/rob/Ma Book1/CS 230/videoToVoice/3/origImages/frame"+strIndex+".jpg")
+    image_file = Path("3/origImages/frame"+strIndex+".jpg").resolve()
+    image = face_recognition.load_image_file(str(image_file))
     face_landmarks_list = face_recognition.face_landmarks(image)
 
     if(len(face_landmarks_list) >= 1):
@@ -41,5 +43,6 @@
             maxWidth = xMax-xMin
 
         arr = misc.imread("3/origImages/frame"+strIndex+".jpg")
+        Path("3/mouthImages").mkdir(exist_ok=True)
         misc.imsave("3/mouthImages/frame"+strIndex+".jpg",arr[yMin-margin:yMax+margin,xMin-margin:xMax+margin])
         print("FINISHED IMAGE #"+str(i)+". Also, the maximum dimensions are "+str(maxWidth)+" x "+str(maxHeight))
diff --git a/pytubeTest.py b/pytubeTest.py
@@ -47,6 +47,4 @@
             faceFrame = frame.crop((left,top,right,bottom-height*0.3))
             faceFrame.save(faceFilename)
 
-command = "ffmpeg -i "+latest_file+" -ab 160k -ac 2 -ar 44100 -vn "+folderNumber+"/audio.wav"
-
-subprocess.call(command, shell=True)
+subprocess.run(["ffmpeg", "-i", latest_file, "-ab", "160k", "-ac", "2", "-ar", "44100", "-vn", folderNumber+"/audio.wav"])
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+VideoSequence
+face_recognition
+pytube
+scipy<=1.0.0
+tensorflow