Skip to content

Commit 3dd5c47

Browse files
committed
Removed cv2 dependency from face extraction.
1 parent f0d6567 commit 3dd5c47

File tree

3 files changed

+31
-21
lines changed

3 files changed

+31
-21
lines changed

Changelog.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,10 @@ Types of changes:
1919

2020
### Changed
2121

22-
- Switched from old mediapipe 0.8 Facemesh solution to new 0.10 FaceLandmarker
22+
- Switched from old mediapipe 0.8 Facemesh solution to new 0.10 FaceLandmarker.
2323
- Number of landmasks increased from 468 to 478! Output shape of the landmark infomation numpy file is changed!
24-
24+
- Removed cv2 dependency from face extraction.
25+
- Now OpenCV is needed only for the computation of the motion energy.
2526

2627
## [0.3] - 2025-08-14
2728

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ cd VideoProcessingTools
2525
pip install -r requirements.txt
2626
```
2727

28+
Get the binary models listed under folder `models`!!!
29+
2830
## Scripts
2931

3032
Here is the list of scripts and their description.

slvideotools/extract_face_data.py

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,25 @@
1-
import cv2
2-
import numpy as np
1+
# Extracts the face mesh data from the frames of a video using MediaPipe.
2+
# See: https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker
33

44
import math
55

6-
from .datagen import create_frame_producer, create_frame_consumer
7-
from .datagen import VideoFrameProducer, VideoFrameConsumer
8-
9-
from typing import List
10-
from typing import Tuple
11-
12-
# Extracts the face mesh data from the frames of a video using MediaPipe.
13-
# See: https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker
6+
import numpy as np
147

158
# Code to overlay the face mesh point taken from https://colab.research.google.com/github/googlesamples/mediapipe/blob/main/examples/face_landmarker/python/%5BMediaPipe_Python_Tasks%5D_Face_Landmarker.ipynb
169
import mediapipe as mp
1710
from mediapipe.tasks import python as mp_python
1811
from mediapipe.tasks.python import vision as mp_vision
1912
VisionRunningMode = mp.tasks.vision.RunningMode
2013

14+
from PIL.Image import Image
15+
import PIL.Image
16+
from PIL import ImageDraw
17+
18+
from .datagen import create_frame_producer, create_frame_consumer
19+
from .datagen import VideoFrameProducer, VideoFrameConsumer
20+
21+
from typing import List
22+
from typing import Tuple
2123

2224

2325
MEDIAPIPE_FACE_LANDMARKS_COUNT = 478
@@ -283,18 +285,20 @@ def extract_face_data(frames_in: VideoFrameProducer,
283285
if composite_frames_out is not None:
284286

285287
# Prepare the overlay image
286-
annotated_image = rgb_image.copy()
288+
#annotated_image = rgb_image.copy()
289+
pil_image: Image = PIL.Image.fromarray(obj=rgb_image)
290+
pil_draw = ImageDraw.Draw(pil_image)
291+
#draw.rectangle(xy=[bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]], outline=(220, 10, 10))
292+
287293

288294
# Draw face mesh landmarks on the overlay image.
289295
if landmarks is not None:
290296

291297
# Let's use 1 pixel radius every 500 pixels of video.
292298
norm_landmark_radius = max(1, int(width / 600))
293-
# Set the thickness as the same as the radius.
294-
norm_landmark_thickness = norm_landmark_radius
295299

296300
#
297-
# Draw the landmarks over the face
301+
# Draw the original landmarks over the face
298302
for i, lm in enumerate(orig_frame_lm_list):
299303
lm_x, lm_y, lm_z = lm[:]
300304

@@ -313,8 +317,9 @@ def extract_face_data(frames_in: VideoFrameProducer,
313317
else:
314318
vcol = (20, 20, 220)
315319

316-
cv2.circle(img=annotated_image, center=(int(lm_x), int(lm_y)), radius=norm_landmark_radius,
317-
color=vcol, thickness=norm_landmark_thickness)
320+
pil_draw.ellipse(xy=[lm_x - norm_landmark_radius, lm_y - norm_landmark_radius,
321+
lm_x + norm_landmark_radius, lm_y + norm_landmark_radius],
322+
fill=vcol)
318323

319324
#
320325
# DEBUG: save the landmarks to a file
@@ -323,7 +328,7 @@ def extract_face_data(frames_in: VideoFrameProducer,
323328
# pickle.dump(obj=lm_list, file=outfile)
324329

325330
#
326-
# Draw the landmarks in the upper left corner of the image using a orthographic projection (i.e., use only x and y)
331+
# Draw the (normaized) landmarks in the upper left corner of the image using a orthographic projection (i.e., use only x and y)
327332
# and we use the depth to modulate the color intensity.
328333

329334
# First compute the dynamic range of the z coordinate among all points
@@ -347,11 +352,13 @@ def extract_face_data(frames_in: VideoFrameProducer,
347352
# rescale z in [0,1]
348353
norm_z = 1 - ((lm_z - z_min) / z_range)
349354

350-
cv2.circle(img=annotated_image, center=(int(lm_x), int(lm_y)), radius=norm_landmark_radius,
351-
color=(int(255 * norm_z), 20, 20), thickness=norm_landmark_thickness)
355+
pil_draw.ellipse(xy=[lm_x - norm_landmark_radius, lm_y - norm_landmark_radius,
356+
lm_x + norm_landmark_radius, lm_y + norm_landmark_radius],
357+
fill=(int(255 * norm_z), 20, 20))
352358

353359
#
354360
# Finally, write the annotated frame to the output video
361+
annotated_image = np.asarray(pil_image) # Back from PIL to numpy array
355362
composite_frames_out.consume(annotated_image)
356363

357364
frame_num += 1

0 commit comments

Comments
 (0)