14
14
from PIL .Image import Image
15
15
import PIL .Image
16
16
from PIL import ImageDraw
17
+ import PIL .ImageFont
17
18
18
19
from .datagen import create_frame_producer , create_frame_consumer
19
20
from .datagen import VideoFrameProducer , VideoFrameConsumer
@@ -224,6 +225,8 @@ def extract_face_data(frames_in: VideoFrameProducer,
224
225
out_Rs = np .ndarray (shape = (0 , 3 , 3 ), dtype = np .float32 )
225
226
out_scales = np .ndarray (shape = (0 ,), dtype = np .float32 )
226
227
228
+ pil_font = None
229
+
227
230
frame_num = 0
228
231
for rgb_image in frames_in .frames ():
229
232
@@ -232,6 +235,11 @@ def extract_face_data(frames_in: VideoFrameProducer,
232
235
width = rgb_image .shape [1 ]
233
236
height = rgb_image .shape [0 ]
234
237
238
+ # Prepare the font size according to the video resolution
239
+ font_size = max (10 , height // MEDIAPIPE_FACE_BLENDSHAPES_COUNT )
240
+ font_size = 32 if font_size > 32 else font_size
241
+ pil_font = PIL .ImageFont .load_default (size = font_size )
242
+
235
243
#
236
244
# RUN MEDIAPIPE FACE LANDMARKER
237
245
mp_image = mp .Image (image_format = mp .ImageFormat .SRGB , data = rgb_image )
@@ -288,22 +296,23 @@ def extract_face_data(frames_in: VideoFrameProducer,
288
296
#
289
297
# Process BLENDSHAPES
290
298
if len (results .face_blendshapes ) == 0 :
291
- frame_bshapes = [float ('nan' )] * MEDIAPIPE_FACE_BLENDSHAPES_COUNT
299
+ frame_bshapes = None
300
+ frame_bshapes_list = [float ('nan' )] * MEDIAPIPE_FACE_BLENDSHAPES_COUNT
292
301
else :
293
302
# Assume there is only one face
294
- frame_bshapes_info = results .face_blendshapes [0 ]
295
- frame_bshapes = [f .score for f in frame_bshapes_info ]
303
+ frame_bshapes = results .face_blendshapes [0 ]
304
+ frame_bshapes_list = [f .score for f in frame_bshapes ]
296
305
297
306
# DEBUG code, also to generate the docs
298
307
# print("MP_BLENDSHAPES=[", end="")
299
308
# for i, bshape in enumerate(frame_bshapes_info):
300
309
# print(f"\"{bshape.category_name}\", ", end="") # {i} ") # {bshape.score:.3f}")
301
310
# print("]", end="")
302
311
303
- assert type (frame_bshapes ) == list
304
- assert len (frame_bshapes ) == MEDIAPIPE_FACE_BLENDSHAPES_COUNT
312
+ assert type (frame_bshapes_list ) == list
313
+ assert len (frame_bshapes_list ) == MEDIAPIPE_FACE_BLENDSHAPES_COUNT
305
314
306
- out_blendshapes_list .append (frame_bshapes )
315
+ out_blendshapes_list .append (frame_bshapes_list )
307
316
308
317
#
309
318
# Manage composite video output
@@ -315,7 +324,7 @@ def extract_face_data(frames_in: VideoFrameProducer,
315
324
pil_draw = ImageDraw .Draw (pil_image )
316
325
#draw.rectangle(xy=[bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]], outline=(220, 10, 10))
317
326
318
-
327
+ #
319
328
# Draw face mesh landmarks on the overlay image.
320
329
if landmarks is not None :
321
330
@@ -381,6 +390,22 @@ def extract_face_data(frames_in: VideoFrameProducer,
381
390
lm_x + norm_landmark_radius , lm_y + norm_landmark_radius ],
382
391
fill = (int (255 * norm_z ), 20 , 20 ))
383
392
393
+ #
394
+ # Draw face blendshape values on the overlay image.
395
+ if frame_bshapes is not None :
396
+
397
+ for i , bshape_info in enumerate (frame_bshapes ):
398
+ bshape_name = bshape_info .category_name
399
+ bshape_score = bshape_info .score
400
+
401
+ # Draw the blendshape name and score in the upper left corner
402
+ y_coord = font_size + font_size * i
403
+ pil_draw .text (
404
+ xy = (10 , y_coord ),
405
+ text = f"{ bshape_name } : { bshape_score :.3f} " ,
406
+ fill = (255 , 255 , 255 ),
407
+ font = pil_font )
408
+
384
409
#
385
410
# Finally, write the annotated frame to the output video
386
411
annotated_image = np .asarray (pil_image ) # Back from PIL to numpy array
0 commit comments