add face emotion and landmarks APP

Neutree · Neutree · commit 01f13bed7e8c · 2025-01-11T18:41:04.000+08:00
diff --git a/docs/doc/en/vision/face_landmarks.md b/docs/doc/en/vision/face_landmarks.md
@@ -72,5 +72,48 @@ while not app.need_exit():
 - `landmarks_abs`: Specifies the coordinates of face keypoints in the original `img`. The `points` variable contains 478 keypoints in the order `x0, y0, x1, y1, ..., x477, y477`.
 - `landmarks_rel`: Outputs coordinates in `img_std` and appends the results to the `points` variable.
 - `points_z`: Represents depth estimation of the keypoints relative to the face's center of gravity. The closer to the camera, the larger the value. If behind the face's center, the value is negative. The values are proportional to the face's width.
+
+## Extracting Partial Keypoints
+
+The 478 keypoints may be excessive for some applications. If you only need specific ones, you can select them based on the <a href="../../assets/maixcam_face_landmarks_full.jpg" target="_blank">high-resolution image</a> index. Common subsets include:
+**Note: These are for reference only. Please rely on the actual model output for accuracy.**
+
+* **146 Keypoints:**
+```python
+sub_146_idxes = [0, 1, 4, 5, 6, 7, 8, 10, 13, 14, 17, 21, 33, 37, 39, 40, 46, 52, 53, 54, 55, 58, 61, 63, 65, 66, 67, 70, 78, 80,
+                 81, 82, 84, 87, 88, 91, 93, 95, 103, 105, 107, 109, 127, 132, 133, 136, 144, 145, 146, 148, 149, 150, 152, 153, 
+                 154, 155, 157, 158, 159, 160, 161, 162, 163, 168, 172, 173, 176, 178, 181, 185, 191, 195, 197, 234, 246, 249, 
+                 251, 263, 267, 269, 270, 276, 282, 283, 284, 285, 288, 291, 293, 295, 296, 297, 300, 308, 310, 311, 312, 314, 
+                 317, 318, 321, 323, 324, 332, 334, 336, 338, 356, 361, 362, 365, 373, 374, 375, 377, 378, 379, 380, 381, 382, 
+                 384, 385, 386, 387, 388, 389, 390, 397, 398, 400, 402, 405, 409, 415, 454, 466, 468, 469, 470, 471, 472, 473, 
+                 474, 475, 476, 477]
+```
+
+* **68 Keypoints:**
+```python
+sub_68_idxes = [162, 234, 93, 58, 172, 136, 149, 148, 152, 377, 378, 365, 397, 288, 323, 454, 389, 71, 63, 105, 66, 107, 336,
+                296, 334, 293, 301, 168, 197, 5, 4, 75, 97, 2, 326, 305, 33, 160, 158, 133, 153, 144, 362, 385, 387, 263, 373,
+                380, 61, 39, 37, 0, 267, 269, 291, 405, 314, 17, 84, 181, 78, 82, 13, 312, 308, 317, 14, 87]
+```
+
+* **5 Keypoints:**
+```python
+sub_5_idxes = [468, 473, 4, 61, 291]
+```
+
+With these indices, you can use the following code to extract and display specific subsets of keypoints:
+
+```python
+def get_sub_landmarks(points, points_z, idxes):
+    new_points = []
+    new_points_z = []
+    for i in idxes:
+        new_points.append(points[i * 2])
+        new_points.append(points[i * 2 + 1])
+        new_points_z.append(points_z[i])
+    return new_points, new_points_z
+
+sub_xy, sub_z = get_sub_landmarks(res.points, res.points_z, sub_146_idxes)
+landmarks_detector.draw_face(img, sub_xy, len(sub_z), sub_z)
 ```
 
diff --git a/docs/doc/zh/vision/face_landmarks.md b/docs/doc/zh/vision/face_landmarks.md
@@ -60,7 +60,7 @@ while not app.need_exit():
         if count >= max_face_num:
             break
     for res in results:
-        landmarks_detector.draw_face(img, res.points, landmarks_detector.landmarks_num, res.points_z)
+        landmarks_detector.draw_face(img, res.points, len(res.points_z), res.points_z)
     disp.show(img)
 ```
 
@@ -71,3 +71,42 @@ while not app.need_exit():
 * `points_z` 是关键点深度估计，值相对于面部重心，离镜头越近值越大，在面部重心之后则为负值，值与面部宽度成比例。
 
 
+## 取部分关键点
+
+478 个关键点有点多，如果你只需要其中几个，可以根据 <a href="../../assets/maixcam_face_landmarks_full.jpg" target="_blank">高清大图</a> 的下标取部分，常见的：
+**注意只提供参考，以模型实际输出为准**
+* 146 个点：
+```python
+sub_146_idxes = [0, 1, 4, 5, 6, 7, 8, 10, 13, 14, 17, 21, 33, 37, 39, 40, 46, 52, 53, 54, 55, 58, 61, 63, 65, 66, 67, 70, 78, 80,
+            81, 82, 84, 87, 88, 91, 93, 95, 103, 105, 107, 109, 127, 132, 133, 136, 144, 145, 146, 148, 149, 150, 152, 153, 154, 155, 157,
+            158, 159, 160, 161, 162, 163, 168, 172, 173, 176, 178, 181, 185, 191, 195, 197, 234, 246, 249, 251, 263, 267, 269, 270, 276, 282,
+            283, 284, 285, 288, 291, 293, 295, 296, 297, 300, 308, 310, 311, 312, 314, 317, 318, 321, 323, 324, 332, 334, 336, 338, 356, 361,
+            362, 365, 373, 374, 375, 377, 378, 379, 380, 381, 382, 384, 385, 386, 387, 388, 389, 390, 397, 398, 400, 402, 405,
+            409, 415, 454, 466, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477]
+```
+* 68 个点：
+```python
+sub_68_idxes = [162, 234, 93, 58, 172, 136, 149, 148, 152, 377, 378, 365, 397, 288, 323, 454, 389, 71, 63, 105, 66, 107, 336,
+                296, 334, 293, 301, 168, 197, 5, 4, 75, 97, 2, 326, 305, 33, 160, 158, 133, 153, 144, 362, 385, 387, 263, 373,
+                380, 61, 39, 37, 0, 267, 269, 291, 405, 314, 17, 84, 181, 78, 82, 13, 312, 308, 317, 14, 87]
+```
+* 5 个点
+```python
+sub_5_idxes = [468, 473, 4, 61, 291]
+```
+
+有了这些下标，我们用代码提取部分出来显示：
+```python
+def get_sub_landmarks(points, points_z, idxes):
+    new_points = []
+    new_points_z = []
+    for i in idxes:
+        new_points.append(points[i*2])
+        new_points.append(points[i*2 + 1])
+        new_points_z.append(points_z[i])
+    return new_points, new_points_z
+
+sub_xy, sub_z = get_sub_landmarks(res.points, res.points_z, sub_146_idxes)
+landmarks_detector.draw_face(img, sub_xy, len(sub_z), sub_z)
+```
+
diff --git a/projects/app_face_emotion/.gitignore b/projects/app_face_emotion/.gitignore
@@ -0,0 +1,3 @@
+data
+__pycache__
+dist
diff --git a/projects/app_face_emotion/README.md b/projects/app_face_emotion/README.md
@@ -0,0 +1,7 @@
+Face landmarks detection
+=====
+
+
+
+visit https://wiki.sipeed.com/maixpy/doc/zh/vision/face_landmarks.html
+
diff --git a/projects/app_face_emotion/app.yaml b/projects/app_face_emotion/app.yaml
@@ -0,0 +1,13 @@
+id: face_emotion
+name: Face Emotion
+name[zh]: 人脸情绪
+version: 1.0.0
+icon: icon.png
+author: neucrack@Sipeed Ltd
+desc: Detect face emotion
+desc[zh]: 检测人脸情绪和表情
+exclude:
+  - data
+  - dist
+  - .gitignore
+
diff --git a/projects/app_face_emotion/icon.png b/projects/app_face_emotion/icon.png
diff --git a/projects/app_face_emotion/main.py b/projects/app_face_emotion/main.py
@@ -0,0 +1,118 @@
+from maix import camera, display, image, nn, app, time, touchscreen
+import math
+
+
+models = {
+    "7 classes": "/root/models/face_emotion.mud"
+}
+models_keys = list(models.keys())
+curr_model = 0
+
+def is_in_button(x, y, btn_pos):
+    return x > btn_pos[0] and x < btn_pos[0] + btn_pos[2] and y > btn_pos[1] and y < btn_pos[1] + btn_pos[3]
+
+def main(disp):
+    global curr_model
+
+    detect_conf_th = 0.5
+    detect_iou_th = 0.45
+    emotion_conf_th = 0.5
+    max_face_num = -1
+    crop_scale = 1.2
+
+    # detect face model
+    detector = nn.YOLOv8(model="/root/models/yolov8n_face.mud", dual_buff = False)
+    # we only use one of it's function to crop face from image, wo we not init model actually
+    landmarks_detector = nn.FaceLandmarks(model="")
+    # emotion classify model
+    classifier = nn.Classifier(model=models[models_keys[curr_model]], dual_buff=False)
+    cam = camera.Camera(detector.input_width(), detector.input_height(), detector.input_format())
+
+    mode_pressed = False
+    ts = touchscreen.TouchScreen()
+    img_back = image.load("/maixapp/share/icon/ret.png")
+    back_rect = [0, 0, 32, 32]
+    mode_rect = [0, cam.height() - 26, image.string_size(models_keys[curr_model]).width() + 6, 30]
+    back_rect_disp = image.resize_map_pos(cam.width(), cam.height(), disp.width(), disp.height(), image.Fit.FIT_CONTAIN, back_rect[0], back_rect[1], back_rect[2], back_rect[3])
+    mode_rect_disp = image.resize_map_pos(cam.width(), cam.height(), disp.width(), disp.height(), image.Fit.FIT_CONTAIN, mode_rect[0], mode_rect[1], mode_rect[2], mode_rect[3])
+
+
+    # for draw result info
+    max_labels_length = 0
+    for label in classifier.labels:
+        size = image.string_size(label)
+        if size.width() > max_labels_length:
+            max_labels_length = size.width()
+
+    max_score_length = cam.width() / 4
+
+    while not app.need_exit():
+        img = cam.read()
+        results = []
+        objs = detector.detect(img, conf_th = detect_conf_th, iou_th = detect_iou_th, sort = 1)
+        count = 0
+        idxes = []
+        img_std_first : image.Image = None
+        for i, obj in enumerate(objs):
+            img_std = landmarks_detector.crop_image(img, obj.x, obj.y, obj.w, obj.h, obj.points,
+                                                classifier.input_width(), classifier.input_height(), crop_scale)
+            if img_std:
+                img_std_gray = img_std.to_format(image.Format.FMT_GRAYSCALE)
+                res = classifier.classify(img_std_gray, softmax=True)
+                results.append(res)
+                idxes.append(i)
+                if i == 0:
+                    img_std_first = img_std
+                count += 1
+                if max_face_num > 0 and count >= max_face_num:
+                    break
+        for i, res in enumerate(results):
+            # draw fisrt face detailed info
+            if i == 0:
+                img.draw_image(0, 0, img_std_first)
+                for j in range(len(classifier.labels)):
+                    idx = res[j][0]
+                    score = res[j][1]
+                    img.draw_string(0, img_std_first.height() + idx * 16, classifier.labels[idx], image.COLOR_WHITE)
+                    img.draw_rect(max_labels_length, int(img_std_first.height() + idx * 16), int(score * max_score_length), 8, image.COLOR_GREEN if score >= emotion_conf_th else image.COLOR_RED, -1)
+                    img.draw_string(int(max_labels_length + score * max_score_length + 2), int(img_std_first.height() + idx * 16), f"{score:.1f}", image.COLOR_RED)
+            # draw on all face
+            color = image.COLOR_GREEN if res[0][1] >= emotion_conf_th else image.COLOR_RED
+            obj = objs[idxes[i]]
+            img.draw_rect(obj.x, obj.y, obj.w, obj.h, color, 1)
+            img.draw_string(obj.x, obj.y, f"{classifier.labels[res[0][0]]}: {res[0][1]:.1f}", color)
+
+        img.draw_image(0, 0, img_back)
+        img.draw_rect(mode_rect[0], mode_rect[1], mode_rect[2], mode_rect[3], image.COLOR_WHITE)
+        img.draw_string(4, img.height() - 20, f"{models_keys[curr_model]}")
+        disp.show(img)
+        x, y, preesed = ts.read()
+        if preesed:
+            mode_pressed = True
+        elif mode_pressed:
+            mode_pressed = False
+            if is_in_button(x, y, back_rect_disp):
+                app.set_exit_flag(True)
+            if is_in_button(x, y, mode_rect_disp):
+                curr_model = (curr_model + 1) % len(models_keys)
+                msg = "switching model ..."
+                size = image.string_size(msg, scale=1.3)
+                img.draw_string((img.width() - size.width()) // 2, (img.height() - size.height())//2, msg, image.COLOR_RED, scale=1.3, thickness=-3)
+                img.draw_string((img.width() - size.width()) // 2, (img.height() - size.height())//2, msg, image.COLOR_WHITE, scale=1.3)
+                disp.show(img)
+                del detector
+                del landmarks_detector
+                break
+
+disp = display.Display()
+try:
+    while not app.need_exit():
+        main(disp)
+except Exception:
+    import traceback
+    msg = traceback.format_exc()
+    img = image.Image(disp.width(), disp.height())
+    img.draw_string(0, 0, msg, image.COLOR_WHITE)
+    disp.show(img)
+    while not app.need_exit():
+        time.sleep_ms(100)
diff --git a/projects/app_face_landmarks/.gitignore b/projects/app_face_landmarks/.gitignore
@@ -0,0 +1,3 @@
+data
+__pycache__
+dist
diff --git a/projects/app_face_landmarks/README.md b/projects/app_face_landmarks/README.md
@@ -0,0 +1,7 @@
+Face landmarks detection
+=====
+
+
+
+visit https://wiki.sipeed.com/maixpy/doc/zh/vision/face_landmarks.html
+
diff --git a/projects/app_face_landmarks/app.yaml b/projects/app_face_landmarks/app.yaml
@@ -0,0 +1,13 @@
+id: face_landmarks
+name: Face Landmarks
+name[zh]: 人脸关键点
+version: 1.0.0
+icon: icon.png
+author: neucrack@Sipeed Ltd
+desc: Detect face landmarks
+desc[zh]: 检测人脸关键点
+exclude:
+  - data
+  - dist
+  - .gitignore
+
diff --git a/projects/app_face_landmarks/icon.png b/projects/app_face_landmarks/icon.png
diff --git a/projects/app_face_landmarks/main.py b/projects/app_face_landmarks/main.py
@@ -0,0 +1,106 @@
+from maix import camera, display, image, nn, app, time, touchscreen
+import math
+
+sub_146_idxes = [0, 1, 4, 5, 6, 7, 8, 10, 13, 14, 17, 21, 33, 37, 39, 40, 46, 52, 53, 54, 55, 58, 61, 63, 65, 66, 67, 70, 78, 80,
+    81, 82, 84, 87, 88, 91, 93, 95, 103, 105, 107, 109, 127, 132, 133, 136, 144, 145, 146, 148, 149, 150, 152, 153, 154, 155, 157,
+    158, 159, 160, 161, 162, 163, 168, 172, 173, 176, 178, 181, 185, 191, 195, 197, 234, 246, 249, 251, 263, 267, 269, 270, 276, 282,
+    283, 284, 285, 288, 291, 293, 295, 296, 297, 300, 308, 310, 311, 312, 314, 317, 318, 321, 323, 324, 332, 334, 336, 338, 356, 361,
+    362, 365, 373, 374, 375, 377, 378, 379, 380, 381, 382, 384, 385, 386, 387, 388, 389, 390, 397, 398, 400, 402, 405,
+    409, 415, 454, 466, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477]
+
+sub_68_idxes = [162, 234, 93, 58, 172, 136, 149, 148, 152, 377, 378, 365, 397, 288, 323, 454, 389, 71, 63, 105, 66, 107, 336,
+                296, 334, 293, 301, 168, 197, 5, 4, 75, 97, 2, 326, 305, 33, 160, 158, 133, 153, 144, 362, 385, 387, 263, 373,
+                380, 61, 39, 37, 0, 267, 269, 291, 405, 314, 17, 84, 181, 78, 82, 13, 312, 308, 317, 14, 87]
+
+sub_5_idxes = [468, 473, 4, 61, 291]
+
+subs = {
+    "478": [],
+    "146": sub_146_idxes,
+    "68": sub_68_idxes,
+    "5": sub_5_idxes,
+}
+subs_keys = list(subs.keys())
+curr_sub = 0
+
+def get_sub_landmarks(points, points_z, idxes):
+    if len(idxes) == 0:
+        return points, points_z
+    new_points = []
+    new_points_z = []
+    for i in idxes:
+        new_points.append(points[i*2])
+        new_points.append(points[i*2 + 1])
+        new_points_z.append(points_z[i])
+    return new_points, new_points_z
+
+def is_in_button(x, y, btn_pos):
+    return x > btn_pos[0] and x < btn_pos[0] + btn_pos[2] and y > btn_pos[1] and y < btn_pos[1] + btn_pos[3]
+
+def main(disp):
+    global curr_sub
+
+    detect_conf_th = 0.5
+    detect_iou_th = 0.45
+    landmarks_conf_th = 0.5
+    landmarks_abs = True
+    landmarks_rel = False
+    max_face_num = 4
+
+    mode_pressed = False
+    detector = nn.YOLOv8(model="/root/models/yolov8n_face.mud", dual_buff = False)
+    landmarks_detector = nn.FaceLandmarks(model="/root/models/face_landmarks.mud")
+
+    cam = camera.Camera(detector.input_width(), detector.input_height(), detector.input_format())
+
+    ts = touchscreen.TouchScreen()
+    img_back = image.load("/maixapp/share/icon/ret.png")
+    back_rect = [0, 0, 32, 32]
+    mode_rect = [0, cam.height() - 26, 100, 30]
+    back_rect_disp = image.resize_map_pos(cam.width(), cam.height(), disp.width(), disp.height(), image.Fit.FIT_CONTAIN, back_rect[0], back_rect[1], back_rect[2], back_rect[3])
+    mode_rect_disp = image.resize_map_pos(cam.width(), cam.height(), disp.width(), disp.height(), image.Fit.FIT_CONTAIN, mode_rect[0], mode_rect[1], mode_rect[2], mode_rect[3])
+
+    while not app.need_exit():
+        img = cam.read()
+        results = []
+        objs = detector.detect(img, conf_th = detect_conf_th, iou_th = detect_iou_th, sort = 1)
+        count = 0
+        for obj in objs:
+            img_std = landmarks_detector.crop_image(img, obj.x, obj.y, obj.w, obj.h, obj.points)
+            if img_std:
+                res = landmarks_detector.detect(img_std, landmarks_conf_th, landmarks_abs, landmarks_rel)
+                if res and res.valid:
+                    results.append(res)
+            count += 1
+            if max_face_num > 0 and count >= max_face_num:
+                break
+        for res in results:
+            sub, sub_z = get_sub_landmarks(res.points, res.points_z, subs[subs_keys[curr_sub]])
+            landmarks_detector.draw_face(img, sub, len(sub_z), sub_z)
+
+        img.draw_image(0, 0, img_back)
+        img.draw_rect(mode_rect[0], mode_rect[1], mode_rect[2], mode_rect[3], image.COLOR_WHITE)
+        img.draw_string(4, img.height() - 20, f"points: {subs_keys[curr_sub]}")
+        disp.show(img)
+        x, y, preesed = ts.read()
+        if preesed:
+            mode_pressed = True
+        elif mode_pressed:
+            mode_pressed = False
+            if is_in_button(x, y, back_rect_disp):
+                app.set_exit_flag(True)
+            if is_in_button(x, y, mode_rect_disp):
+                curr_sub = (curr_sub + 1) % len(subs_keys)
+
+disp = display.Display()
+try:
+    while not app.need_exit():
+        main(disp)
+except Exception:
+    import traceback
+    msg = traceback.format_exc()
+    img = image.Image(disp.width(), disp.height())
+    img.draw_string(0, 0, msg, image.COLOR_WHITE)
+    disp.show(img)
+    while not app.need_exit():
+        time.sleep_ms(100)

-Original file line number
+Diff line change
@@ @@ -0,0 +1,7 @@ @@
 +Face landmarks detection
 +=====
++
++
++
 +visit https://wiki.sipeed.com/maixpy/doc/zh/vision/face_landmarks.html
++