Skip to content

Commit 0e19f62

Browse files
committed
Added overlay rendering of the blendshape values
1 parent 4bd98a3 commit 0e19f62

File tree

1 file changed

+32
-7
lines changed

1 file changed

+32
-7
lines changed

slvideotools/extract_face_data.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from PIL.Image import Image
1515
import PIL.Image
1616
from PIL import ImageDraw
17+
import PIL.ImageFont
1718

1819
from .datagen import create_frame_producer, create_frame_consumer
1920
from .datagen import VideoFrameProducer, VideoFrameConsumer
@@ -224,6 +225,8 @@ def extract_face_data(frames_in: VideoFrameProducer,
224225
out_Rs = np.ndarray(shape=(0, 3, 3), dtype=np.float32)
225226
out_scales = np.ndarray(shape=(0,), dtype=np.float32)
226227

228+
pil_font = None
229+
227230
frame_num = 0
228231
for rgb_image in frames_in.frames():
229232

@@ -232,6 +235,11 @@ def extract_face_data(frames_in: VideoFrameProducer,
232235
width = rgb_image.shape[1]
233236
height = rgb_image.shape[0]
234237

238+
# Prepare the font size according to the video resolution
239+
font_size = max(10, height // MEDIAPIPE_FACE_BLENDSHAPES_COUNT)
240+
font_size = 32 if font_size > 32 else font_size
241+
pil_font = PIL.ImageFont.load_default(size=font_size)
242+
235243
#
236244
# RUN MEDIAPIPE FACE LANDMARKER
237245
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image)
@@ -288,22 +296,23 @@ def extract_face_data(frames_in: VideoFrameProducer,
288296
#
289297
# Process BLENDSHAPES
290298
if len(results.face_blendshapes) == 0:
291-
frame_bshapes = [float('nan')] * MEDIAPIPE_FACE_BLENDSHAPES_COUNT
299+
frame_bshapes = None
300+
frame_bshapes_list = [float('nan')] * MEDIAPIPE_FACE_BLENDSHAPES_COUNT
292301
else:
293302
# Assume there is only one face
294-
frame_bshapes_info = results.face_blendshapes[0]
295-
frame_bshapes = [f.score for f in frame_bshapes_info]
303+
frame_bshapes = results.face_blendshapes[0]
304+
frame_bshapes_list = [f.score for f in frame_bshapes]
296305

297306
# DEBUG code, also to generate the docs
298307
# print("MP_BLENDSHAPES=[", end="")
299308
# for i, bshape in enumerate(frame_bshapes_info):
300309
# print(f"\"{bshape.category_name}\", ", end="") # {i} ") # {bshape.score:.3f}")
301310
# print("]", end="")
302311

303-
assert type(frame_bshapes) == list
304-
assert len(frame_bshapes) == MEDIAPIPE_FACE_BLENDSHAPES_COUNT
312+
assert type(frame_bshapes_list) == list
313+
assert len(frame_bshapes_list) == MEDIAPIPE_FACE_BLENDSHAPES_COUNT
305314

306-
out_blendshapes_list.append(frame_bshapes)
315+
out_blendshapes_list.append(frame_bshapes_list)
307316

308317
#
309318
# Manage composite video output
@@ -315,7 +324,7 @@ def extract_face_data(frames_in: VideoFrameProducer,
315324
pil_draw = ImageDraw.Draw(pil_image)
316325
#draw.rectangle(xy=[bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]], outline=(220, 10, 10))
317326

318-
327+
#
319328
# Draw face mesh landmarks on the overlay image.
320329
if landmarks is not None:
321330

@@ -381,6 +390,22 @@ def extract_face_data(frames_in: VideoFrameProducer,
381390
lm_x + norm_landmark_radius, lm_y + norm_landmark_radius],
382391
fill=(int(255 * norm_z), 20, 20))
383392

393+
#
394+
# Draw face blendshape values on the overlay image.
395+
if frame_bshapes is not None:
396+
397+
for i, bshape_info in enumerate(frame_bshapes):
398+
bshape_name = bshape_info.category_name
399+
bshape_score = bshape_info.score
400+
401+
# Draw the blendshape name and score in the upper left corner
402+
y_coord = font_size + font_size * i
403+
pil_draw.text(
404+
xy=(10, y_coord),
405+
text=f"{bshape_name}: {bshape_score:.3f}",
406+
fill=(255, 255, 255),
407+
font=pil_font)
408+
384409
#
385410
# Finally, write the annotated frame to the output video
386411
annotated_image = np.asarray(pil_image) # Back from PIL to numpy array

0 commit comments

Comments
 (0)