Merge pull request #75 from HyperInspire/dev/cuda

tunmx · web-flow · commit 33299cee04be · 2024-07-06T23:35:02.000+08:00
Dev/cuda Former-commit-id: 87377ab
diff --git a/command/build_linux_cuda.sh b/command/build_linux_cuda.sh
@@ -41,8 +41,9 @@ cmake -DCMAKE_SYSTEM_NAME=Linux \
   -DISF_BUILD_WITH_SAMPLE=ON \
   -DISF_BUILD_WITH_TEST=ON \
   -DISF_ENABLE_BENCHMARK=ON \
-  -DISF_ENABLE_USE_LFW_DATA=ON \
-  -DISF_ENABLE_TEST_EVALUATION=ON \
+  -DISF_ENABLE_USE_LFW_DATA=OFF \
+  -DISF_ENABLE_TEST_EVALUATION=OFF \
+  -DISF_ENABLE_TRACKING_BY_DETECTION=ON \
   -DMNN_CUDA=ON \
   -DISF_GLOBAL_INFERENCE_BACKEND_USE_MNN_CUDA=ON \
   -DISF_LINUX_MNN_CUDA=/home/tunm/softwate/MNN-2.7.2/build_cuda ${SCRIPT_DIR}
diff --git a/cpp/sample/cpp/sample_face_track_video.cpp b/cpp/sample/cpp/sample_face_track_video.cpp
@@ -2,6 +2,8 @@
 #include "c_api/intypedef.h"
 #include "opencv2/opencv.hpp"
 #include "inspireface/c_api/inspireface.h"
+#include <unordered_map>
+#include <functional>
 
 void drawMode(cv::Mat& frame, HFDetectMode mode) {
     std::string modeText;
@@ -19,9 +21,26 @@ void drawMode(cv::Mat& frame, HFDetectMode mode) {
             modeText = "Mode: Unknown";
             break;
     }
-    cv::putText(frame, modeText, cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(255, 255, 255), 2);
+    cv::putText(frame, modeText, cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(90, 100, 255), 2);
 }
 
+cv::Scalar generateColor(int id) {
+    int maxID = 100;
+    id = id % maxID;
+
+    int hue = (id * 360 / maxID) % 360; 
+    int saturation = 255; 
+    int value = 200;
+
+    cv::Mat hsv(1, 1, CV_8UC3, cv::Scalar(hue, saturation, value));
+    cv::Mat rgb;
+    cv::cvtColor(hsv, rgb, cv::COLOR_HSV2BGR);
+
+    cv::Vec3b rgbColor = rgb.at<cv::Vec3b>(0, 0);
+    return cv::Scalar(rgbColor[0], rgbColor[1], rgbColor[2]);
+}
+
+
 int main(int argc, char* argv[]) {
     // Check whether the number of parameters is correct
     if (argc != 3) {
@@ -46,11 +65,11 @@ int main(int argc, char* argv[]) {
     // Enable the functions in the pipeline: mask detection, live detection, and face quality detection
     HOption option = HF_ENABLE_QUALITY | HF_ENABLE_MASK_DETECT | HF_ENABLE_INTERACTION;
     // Video or frame sequence mode uses VIDEO-MODE, which is face detection with tracking
-    HFDetectMode detMode = HF_DETECT_MODE_LIGHT_TRACK;
+    HFDetectMode detMode = HF_DETECT_MODE_TRACK_BY_DETECTION;
     // Maximum number of faces detected
     HInt32 maxDetectNum = 20;
     // Face detection image input level
-    HInt32 detectPixelLevel = 160;
+    HInt32 detectPixelLevel = 640;
     // fps in tracking-by-detection mode
     HInt32 trackByDetectFps = 20;
     HFSession session = {0};
@@ -122,35 +141,36 @@ int main(int argc, char* argv[]) {
 
         // Draw detection mode on the frame
         drawMode(draw, detMode);
-        if (faceNum > 0) {
-            ret = HFMultipleFacePipelineProcessOptional(session, imageHandle, &multipleFaceData, option);
-            if (ret != HSUCCEED)
-            {   
-                std::cout << "HFMultipleFacePipelineProcessOptional error: " << ret << std::endl;
-                return ret;
-            }
-            HFFaceIntereactionResult result;
-            ret = HFGetFaceIntereactionResult(session, &result);
-             if (ret != HSUCCEED)
-            {   
-                std::cout << "HFGetFaceIntereactionResult error: " << ret << std::endl;
-                return ret;
-            }
-            std::cout << "Left eye status: " << result.leftEyeStatusConfidence[0] << std::endl;
-            std::cout << "Righ eye status: " << result.rightEyeStatusConfidence[0] << std::endl;
-
-        }
+        // if (faceNum > 0) {
+        //     ret = HFMultipleFacePipelineProcessOptional(session, imageHandle, &multipleFaceData, option);
+        //     if (ret != HSUCCEED)
+        //     {   
+        //         std::cout << "HFMultipleFacePipelineProcessOptional error: " << ret << std::endl;
+        //         return ret;
+        //     }
+        //     HFFaceIntereactionResult result;
+        //     ret = HFGetFaceIntereactionResult(session, &result);
+        //      if (ret != HSUCCEED)
+        //     {   
+        //         std::cout << "HFGetFaceIntereactionResult error: " << ret << std::endl;
+        //         return ret;
+        //     }
+        //     std::cout << "Left eye status: " << result.leftEyeStatusConfidence[0] << std::endl;
+        //     std::cout << "Righ eye status: " << result.rightEyeStatusConfidence[0] << std::endl;
+
+        // }
         
         for (int index = 0; index < faceNum; ++index) {
             // std::cout << "========================================" << std::endl;
             // std::cout << "Process face index: " << index << std::endl;
+            // Print FaceID, In VIDEO-MODE it is fixed, but it may be lost
+            auto trackId = multipleFaceData.trackIds[index];
+
             // Use OpenCV's Rect to receive face bounding boxes
             auto rect = cv::Rect(multipleFaceData.rects[index].x, multipleFaceData.rects[index].y,
                                  multipleFaceData.rects[index].width, multipleFaceData.rects[index].height);
-            cv::rectangle(draw, rect, cv::Scalar(0, 100, 255), 5);
+            cv::rectangle(draw, rect, generateColor(trackId), 3);
 
-            // Print FaceID, In VIDEO-MODE it is fixed, but it may be lost
-            auto trackId = multipleFaceData.trackIds[index];
             // std::cout << "FaceID: " << trackId << std::endl;
 
             // Print Head euler angle, It can often be used to judge the quality of a face by the Angle of the head
@@ -160,7 +180,7 @@ int main(int argc, char* argv[]) {
 
             // Add TrackID to the drawing
             cv::putText(draw, "ID: " + std::to_string(trackId), cv::Point(rect.x, rect.y - 10),
-                        cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 255, 0), 2);
+                        cv::FONT_HERSHEY_SIMPLEX, 0.5, generateColor(trackId), 2);
 
             HInt32 numOfLmk;
             HFGetNumOfFaceDenseLandmark(&numOfLmk);
@@ -172,7 +192,7 @@ int main(int argc, char* argv[]) {
             }
             for (size_t i = 0; i < numOfLmk; i++) {
                 cv::Point2f p(denseLandmarkPoints[i].x, denseLandmarkPoints[i].y);
-                cv::circle(draw, p, 0, (0, 0, 255), 2);
+                cv::circle(draw, p, 0, generateColor(trackId), 2);
             }
         }
         
diff --git a/python/sample_face_track_from_video.py b/python/sample_face_track_from_video.py
@@ -4,11 +4,36 @@
 from inspireface.param import *
 import numpy as np
 
+
+def generate_color(id):
+    """
+    Generate a bright color based on the given integer ID. Ensures 50 unique colors.
+
+    Args:
+        id (int): The ID for which to generate a color.
+
+    Returns:
+        tuple: A tuple representing the color in BGR format.
+    """
+    max_id = 50  # Number of unique colors
+    id = id % max_id
+
+    # Generate HSV color
+    hue = int((id * 360 / max_id) % 360)  # Distribute hue values equally
+    saturation = 200 + (55 * id) % 55  # High saturation for bright colors
+    value = 200 + (55 * id) % 55  # High value for bright colors
+
+    hsv_color = np.uint8([[[hue, saturation, value]]])
+    rgb_color = cv2.cvtColor(hsv_color, cv2.COLOR_HSV2BGR)[0][0]
+
+    return (int(rgb_color[0]), int(rgb_color[1]), int(rgb_color[2]))
+
 @click.command()
 @click.argument("resource_path")
 @click.argument('source')
 @click.option('--show', is_flag=True, help='Display the video stream or video file in a window.')
-def case_face_tracker_from_video(resource_path, source, show):
+@click.option('--out', type=str, default=None, help='Path to save the processed video.')
+def case_face_tracker_from_video(resource_path, source, show, out):
     """
     Launch a face tracking process from a video source. The 'source' can either be a webcam index (0, 1, ...)
     or a path to a video file. Use the --show option to display the video.
@@ -17,6 +42,7 @@ def case_face_tracker_from_video(resource_path, source, show):
         resource_path (str): Path to the resource directory for face tracking algorithms.
         source (str): Webcam index or path to the video file.
         show (bool): If set, the video will be displayed in a window.
+        out (str): Path to save the processed video.
     """
     # Initialize the face tracker or other resources.
     print(f"Initializing with resources from: {resource_path}")
@@ -26,8 +52,8 @@ def case_face_tracker_from_video(resource_path, source, show):
 
     # Optional features, loaded during session creation based on the modules specified.
     opt = HF_ENABLE_NONE
-    session = ifac.InspireFaceSession(opt, HF_DETECT_MODE_LIGHT_TRACK)    # Use video mode
-
+    session = ifac.InspireFaceSession(opt, HF_DETECT_MODE_TRACK_BY_DETECTION, max_detect_num=25, detect_pixel_level=640)    # Use video mode
+    session.set_filter_minimum_face_pixel_size(0)
     # Determine if the source is a digital webcam index or a video file path.
     try:
         source_index = int(source)  # Try to convert source to an integer.
@@ -42,6 +68,15 @@ def case_face_tracker_from_video(resource_path, source, show):
         print("Error: Could not open video source.")
         return
 
+    # VideoWriter to save the processed video if out is provided.
+    if out:
+        fourcc = cv2.VideoWriter_fourcc(*'XVID')
+        fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 30
+        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        out_video = cv2.VideoWriter(out, fourcc, fps, (frame_width, frame_height))
+        print(f"Saving video to: {out}")
+
     # Main loop to process video frames.
     while True:
         ret, frame = cap.read()
@@ -62,31 +97,44 @@ def case_face_tracker_from_video(resource_path, source, show):
             # Calculate center, size, and angle
             center = ((x1 + x2) / 2, (y1 + y2) / 2)
             size = (x2 - x1, y2 - y1)
-            angle = face.roll 
-
-            # Get rotation matrix
-            rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
+            angle = face.roll
 
             # Apply rotation to the bounding box corners
             rect = ((center[0], center[1]), (size[0], size[1]), angle)
             box = cv2.boxPoints(rect)
             box = box.astype(int)
 
+            color = generate_color(face.track_id)
+
             # Draw the rotated bounding box
-            cv2.drawContours(frame, [box], 0, (100, 180, 29), 2)
+            cv2.drawContours(frame, [box], 0, color, 4)
 
             # Draw landmarks
             lmk = session.get_face_dense_landmark(face)
             for x, y in lmk.astype(int):
-                cv2.circle(frame, (x, y), 0, (220, 100, 0), 2)
+                cv2.circle(frame, (x, y), 0, color, 4)
+
+            # Draw track ID at the top of the bounding box
+            text = f"ID: {face.track_id}"
+            text_size, _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
+            text_x = min(box[:, 0])
+            text_y = min(box[:, 1]) - 10
+            if text_y < 0:
+                text_y = min(box[:, 1]) + text_size[1] + 10
+            cv2.putText(frame, text, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
 
         if show:
             cv2.imshow("Face Tracker", frame)
             if cv2.waitKey(1) & 0xFF == ord('q'):
                 break  # Exit loop if 'q' is pressed.
 
+        if out:
+            out_video.write(frame)
+
     # Cleanup: release video capture and close any open windows.
     cap.release()
+    if out:
+        out_video.release()
     cv2.destroyAllWindows()
     print("Released all resources and closed windows.")