1
- import cv2
2
- import numpy as np
1
+ # Extracts the face mesh data from the frames of a video using MediaPipe.
2
+ # See: https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker
3
3
4
4
import math
5
5
6
- from .datagen import create_frame_producer , create_frame_consumer
7
- from .datagen import VideoFrameProducer , VideoFrameConsumer
8
-
9
- from typing import List
10
- from typing import Tuple
11
-
12
- # Extracts the face mesh data from the frames of a video using MediaPipe.
13
- # See: https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker
6
+ import numpy as np
14
7
15
8
# Code to overlay the face mesh point taken from https://colab.research.google.com/github/googlesamples/mediapipe/blob/main/examples/face_landmarker/python/%5BMediaPipe_Python_Tasks%5D_Face_Landmarker.ipynb
16
9
import mediapipe as mp
17
10
from mediapipe .tasks import python as mp_python
18
11
from mediapipe .tasks .python import vision as mp_vision
19
12
VisionRunningMode = mp .tasks .vision .RunningMode
20
13
14
+ from PIL .Image import Image
15
+ import PIL .Image
16
+ from PIL import ImageDraw
17
+
18
+ from .datagen import create_frame_producer , create_frame_consumer
19
+ from .datagen import VideoFrameProducer , VideoFrameConsumer
20
+
21
+ from typing import List
22
+ from typing import Tuple
21
23
22
24
23
25
MEDIAPIPE_FACE_LANDMARKS_COUNT = 478
@@ -283,18 +285,20 @@ def extract_face_data(frames_in: VideoFrameProducer,
283
285
if composite_frames_out is not None :
284
286
285
287
# Prepare the overlay image
286
- annotated_image = rgb_image .copy ()
288
+ #annotated_image = rgb_image.copy()
289
+ pil_image : Image = PIL .Image .fromarray (obj = rgb_image )
290
+ pil_draw = ImageDraw .Draw (pil_image )
291
+ #draw.rectangle(xy=[bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]], outline=(220, 10, 10))
292
+
287
293
288
294
# Draw face mesh landmarks on the overlay image.
289
295
if landmarks is not None :
290
296
291
297
# Let's use 1 pixel radius every 500 pixels of video.
292
298
norm_landmark_radius = max (1 , int (width / 600 ))
293
- # Set the thickness as the same as the radius.
294
- norm_landmark_thickness = norm_landmark_radius
295
299
296
300
#
297
- # Draw the landmarks over the face
301
+ # Draw the original landmarks over the face
298
302
for i , lm in enumerate (orig_frame_lm_list ):
299
303
lm_x , lm_y , lm_z = lm [:]
300
304
@@ -313,8 +317,9 @@ def extract_face_data(frames_in: VideoFrameProducer,
313
317
else :
314
318
vcol = (20 , 20 , 220 )
315
319
316
- cv2 .circle (img = annotated_image , center = (int (lm_x ), int (lm_y )), radius = norm_landmark_radius ,
317
- color = vcol , thickness = norm_landmark_thickness )
320
+ pil_draw .ellipse (xy = [lm_x - norm_landmark_radius , lm_y - norm_landmark_radius ,
321
+ lm_x + norm_landmark_radius , lm_y + norm_landmark_radius ],
322
+ fill = vcol )
318
323
319
324
#
320
325
# DEBUG: save the landmarks to a file
@@ -323,7 +328,7 @@ def extract_face_data(frames_in: VideoFrameProducer,
323
328
# pickle.dump(obj=lm_list, file=outfile)
324
329
325
330
#
326
- # Draw the landmarks in the upper left corner of the image using a orthographic projection (i.e., use only x and y)
331
+ # Draw the (normaized) landmarks in the upper left corner of the image using a orthographic projection (i.e., use only x and y)
327
332
# and we use the depth to modulate the color intensity.
328
333
329
334
# First compute the dynamic range of the z coordinate among all points
@@ -347,11 +352,13 @@ def extract_face_data(frames_in: VideoFrameProducer,
347
352
# rescale z in [0,1]
348
353
norm_z = 1 - ((lm_z - z_min ) / z_range )
349
354
350
- cv2 .circle (img = annotated_image , center = (int (lm_x ), int (lm_y )), radius = norm_landmark_radius ,
351
- color = (int (255 * norm_z ), 20 , 20 ), thickness = norm_landmark_thickness )
355
+ pil_draw .ellipse (xy = [lm_x - norm_landmark_radius , lm_y - norm_landmark_radius ,
356
+ lm_x + norm_landmark_radius , lm_y + norm_landmark_radius ],
357
+ fill = (int (255 * norm_z ), 20 , 20 ))
352
358
353
359
#
354
360
# Finally, write the annotated frame to the output video
361
+ annotated_image = np .asarray (pil_image ) # Back from PIL to numpy array
355
362
composite_frames_out .consume (annotated_image )
356
363
357
364
frame_num += 1
0 commit comments