Merge pull request #71 from HyperInspire/dev/detection

tunmx · web-flow · commit 9b87be466d4b · 2024-07-02T22:40:39.000+08:00
Dev/detection Former-commit-id: 12004ab
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -9,7 +9,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
 # Current version
 set(INSPIRE_FACE_VERSION_MAJOR 1)
 set(INSPIRE_FACE_VERSION_MINOR 1)
-set(INSPIRE_FACE_VERSION_PATCH 1)
+set(INSPIRE_FACE_VERSION_PATCH 2)
 
 # Converts the version number to a string
 string(CONCAT INSPIRE_FACE_VERSION_MAJOR_STR ${INSPIRE_FACE_VERSION_MAJOR})
@@ -108,6 +108,7 @@ if (APPLE)
     endif ()
 else()
     if (ISF_BUILD_LINUX_ARM7 OR ISF_BUILD_LINUX_AARCH64)
+        set(DISABLE_GUI ON)
         add_definitions("-DDISABLE_GUI")
         #        set(OpenCV_DIR ${ISF_THIRD_PARTY_DIR}/opencv/opencv-linux-armhf/share/OpenCV)
         #        set(OpenCV_STATIC_INCLUDE_DIR ${PATH_3RDPARTY}/opencv/opencv-linux-armhf/include/)
diff --git a/README.md b/README.md
@@ -8,7 +8,9 @@ If you require further information on tracking development branches, CI/CD proce
 
 Please contact [contact@insightface.ai](mailto:contact@insightface.ai?subject=InspireFace) for commercial support, including obtaining and integrating higher accuracy models, as well as custom development.
 
-## ChangeLogs
+## Change Logs
+
+**`2024-07-02`** Fixed several bugs in the face detector with multi-level input.
 
 **`2024-06-27`** Verified iOS usability and fixed some bugs.
 
@@ -69,6 +71,9 @@ The '3rdparty' directory already includes the MNN library and specifies a partic
         - Prepare the cross-compilation toolchain in advance, such as gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf
 - CUDA (version 10.1 or higher)
     - GPU-based inference requires installing NVIDIA's CUDA dependencies on the device.
+- Eigen3
+    - If you need to use the tracking-by-detection feature, you must have Eigen3 installed in advance.
+
 - RKNN
     - Adjust and select versions currently supported for specific requirements.
 
@@ -357,7 +362,7 @@ For different scenarios, we currently provide several Packs, each containing mul
 
 | Name | Supported Devices | Note | Link |
 | --- | --- | --- | --- |
-| Pikachu | CPU | Lightweight edge-side model | [GDrive](https://drive.google.com/file/d/1i4uC-dZTQxdVgn2rP0ZdfJTMkJIXgYY4/view?usp=drive_link) |
-| Megatron | CPU, GPU | Local or server-side model | [GDrive](https://drive.google.com/file/d/1i4uC-dZTQxdVgn2rP0ZdfJTMkJIXgYY4/view?usp=drive_link) |
-| Gundam-RV1109 | RKNPU | Supports RK1109 and RK1126 | [GDrive](https://drive.google.com/file/d/1i4uC-dZTQxdVgn2rP0ZdfJTMkJIXgYY4/view?usp=drive_link) |
+| Pikachu | CPU | Lightweight edge-side model | [GDrive](https://drive.google.com/drive/folders/1krmv9Pj0XEZXR1GRPHjW_Sl7t4l0dNSS?usp=sharing) |
+| Megatron | CPU, GPU | Local or server-side model | [GDrive](https://drive.google.com/drive/folders/1krmv9Pj0XEZXR1GRPHjW_Sl7t4l0dNSS?usp=sharing) |
+| Gundam-RV1109 | RKNPU | Supports RK1109 and RK1126 | [GDrive](https://drive.google.com/drive/folders/1krmv9Pj0XEZXR1GRPHjW_Sl7t4l0dNSS?usp=sharing) |
 
diff --git a/command/build_android.sh b/command/build_android.sh
@@ -39,14 +39,14 @@ reorganize_structure() {
                     ;;
                 sample)
                     # Copy the sample directory
-                    if [ -d "$arch_dir/sample" ]; then
-                        cp -r "$arch_dir/sample/"* "$base_path/$main_dir/$arch/"
+                    if [ -d "$arch_dir/InspireFace/sample" ]; then
+                        cp -r "$arch_dir/InspireFace/sample/"* "$base_path/$main_dir/$arch/"
                     fi
                     ;;
                 test)
                     # Copy the test directory
-                    if [ -d "$arch_dir/test" ]; then
-                        cp -r "$arch_dir/test/"* "$base_path/$main_dir/$arch/"
+                    if [ -d "$arch_dir/InspireFace/test" ]; then
+                        cp -r "$arch_dir/InspireFace/test/"* "$base_path/$main_dir/$arch/"
                     fi
                     ;;
             esac
diff --git a/command/build_cross_rv1109rv1126_armhf.sh b/command/build_cross_rv1109rv1126_armhf.sh
@@ -55,7 +55,7 @@ cmake -DCMAKE_SYSTEM_NAME=Linux \
   -DISF_ENABLE_TEST_EVALUATION=OFF \
   -DISF_BUILD_SHARED_LIBS=ON ${SCRIPT_DIR}
 
-make -j4
+make -j1
 make install
 
 move_install_files "$(pwd)"
diff --git a/cpp/inspireface/c_api/inspireface.h b/cpp/inspireface/c_api/inspireface.h
@@ -171,7 +171,7 @@ HYPER_CAPI_EXPORT extern HResult HFCreateInspireFaceSession(
  * @param detectMode Detection mode to be used.
  * @param maxDetectFaceNum Maximum number of faces to detect.
  * @param detectPixelLevel Modify the input resolution level of the detector, the larger the better, 
- *          the need to input a multiple of 160, such as 160, 320, 640, the default value -1 is 160.
+ *          the need to input a multiple of 160, such as 160, 320, 640, the default value -1 is 320.
  * @param trackByDetectModeFPS If you are using the MODE_TRACK_BY_DETECTION tracking mode, 
  *          this value is used to set the fps frame rate of your current incoming video stream, which defaults to -1 at 30fps.
  * @param handle Pointer to the context handle that will be returned.
diff --git a/cpp/inspireface/information.h b/cpp/inspireface/information.h
@@ -7,6 +7,6 @@
 
 #define INSPIRE_FACE_VERSION_MAJOR_STR "1"
 #define INSPIRE_FACE_VERSION_MINOR_STR "1"
-#define INSPIRE_FACE_VERSION_PATCH_STR "1"
+#define INSPIRE_FACE_VERSION_PATCH_STR "2"
 
 #endif //HYPERFACEREPO_INFORMATION_H
diff --git a/cpp/inspireface/middleware/utils.h b/cpp/inspireface/middleware/utils.h
@@ -643,14 +643,13 @@ inline cv::Rect GetNewBox(int src_w, int src_h, cv::Rect bbox, float scale) {
 
 
 template<typename T>
-inline bool isShortestSideGreaterThan(const cv::Rect_<T>& rect, T value) {
+inline bool isShortestSideGreaterThan(const cv::Rect_<T>& rect, T value, float scale) {
     // Find the shortest edge
-    T shortestSide = std::min(rect.width, rect.height);
+    T shortestSide = std::min(rect.width / scale, rect.height / scale);
     // Determines whether the shortest edge is greater than the given value
     return shortestSide > value;
 }
 
-
 }   // namespace inspire
 
 #endif
diff --git a/cpp/inspireface/track_module/face_track.cpp b/cpp/inspireface/track_module/face_track.cpp
@@ -342,7 +342,7 @@ void FaceTrack::DetectFace(const cv::Mat &input, float scale) {
             Object obj;
             const auto box = boxes[i];
             obj.rect = Rect_<float>(box.x1, box.y1, box.x2 - box.x1, box.y2 - box.y1);
-            if (!isShortestSideGreaterThan<float>(obj.rect, filter_minimum_face_px_size)) {
+            if (!isShortestSideGreaterThan<float>(obj.rect, filter_minimum_face_px_size, scale)) {
                 // Filter too small face detection box
                 continue;
             }
@@ -364,8 +364,8 @@ void FaceTrack::DetectFace(const cv::Mat &input, float scale) {
         for (int i = 0; i < boxes.size(); i++) {
             bbox[i] = cv::Rect(cv::Point(static_cast<int>(boxes[i].x1), static_cast<int>(boxes[i].y1)),
                             cv::Point(static_cast<int>(boxes[i].x2), static_cast<int>(boxes[i].y2)));
-
-            if (!isShortestSideGreaterThan<float>(bbox[i], filter_minimum_face_px_size)) {
+    
+            if (!isShortestSideGreaterThan<float>(bbox[i], filter_minimum_face_px_size, scale)) {
                 // Filter too small face detection box
                 continue;
             }
@@ -378,16 +378,14 @@ void FaceTrack::DetectFace(const cv::Mat &input, float scale) {
             
             FaceObject faceinfo(tracking_idx_, bbox[i], FaceLandmark::NUM_OF_LANDMARK);
             faceinfo.detect_bbox_ = bbox[i];
-
+            
             // Control that the number of faces detected does not exceed the maximum limit
-            if (candidate_faces_.size() < max_detected_faces_) {
-                candidate_faces_.push_back(faceinfo);
-            } else {
-                // If the maximum limit is exceeded, you can choose to discard the currently detected face or choose the face to discard according to the policy
-                // For example, face confidence can be compared and faces with lower confidence can be discarded
-                // Take the example of simply discarding the last face
-                candidate_faces_.pop_back();
+            if (candidate_faces_.size() >= max_detected_faces_)
+            {
+                continue;
             }
+            
+            candidate_faces_.push_back(faceinfo);
         }
     }
     
@@ -396,9 +394,10 @@ void FaceTrack::DetectFace(const cv::Mat &input, float scale) {
 int FaceTrack::Configuration(inspire::InspireArchive &archive) {
     // Initialize the detection model
     InspireModel detModel;
-    auto ret = archive.LoadModel("face_detect", detModel);
+    auto scheme = ChoiceMultiLevelDetectModel(m_dynamic_detection_input_level_);
+    auto ret = archive.LoadModel(scheme, detModel);
     if (ret != SARC_SUCCESS) {
-        INSPIRE_LOGE("Load %s error: %d", "face_detect", ret);
+        INSPIRE_LOGE("Load %s error: %d", "face_detect_320", ret);
         return HERR_ARCHIVE_LOAD_MODEL_FAILURE;
     }
     InitDetectModel(detModel);
@@ -444,21 +443,9 @@ int FaceTrack::InitLandmarkModel(InspireModel &model) {
 
 int FaceTrack::InitDetectModel(InspireModel &model) {
     std::vector<int> input_size;
-    if (m_dynamic_detection_input_level_ != -1) {
-        if (m_dynamic_detection_input_level_ % 160 != 0 || m_dynamic_detection_input_level_ < 160) {
-            INSPIRE_LOGE("The input size '%d' for the custom detector is not valid.  \
-            Please use a multiple of 160 (minimum 160) for the input dimensions, such as 320 or 640.", m_dynamic_detection_input_level_);
-            return HERR_INVALID_DETECTION_INPUT;
-        }
-        // Wide-Range mode temporary value
-        input_size = {m_dynamic_detection_input_level_, m_dynamic_detection_input_level_};
-        model.Config().set<std::vector<int>>("input_size", input_size);
-    } else {
-        input_size = model.Config().get<std::vector<int>>("input_size");
-    }
-    bool dym = true;
+    input_size = model.Config().get<std::vector<int>>("input_size");
     m_face_detector_ = std::make_shared<FaceDetect>(input_size[0]);
-    auto ret = m_face_detector_->loadData(model, model.modelType, dym);
+    auto ret = m_face_detector_->loadData(model, model.modelType, false);
     if (ret != InferenceHelper::kRetOk) {
         return HERR_ARCHIVE_LOAD_FAILURE;
     }
@@ -499,5 +486,41 @@ void FaceTrack::SetTrackPreviewSize(int preview_size) {
         track_preview_size_ = preview_size;
 }
 
+std::string FaceTrack::ChoiceMultiLevelDetectModel(const int32_t pixel_size) {
+    const int32_t supported_sizes[] = {160, 320, 640};
+    const std::string scheme_names[] = {"face_detect_160", "face_detect_320", "face_detect_640"};
+    const int32_t num_sizes = sizeof(supported_sizes) / sizeof(supported_sizes[0]);
+
+    if (pixel_size == -1)
+    {
+        return scheme_names[1];
+    }
+
+    // Check for exact match
+    for (int i = 0; i < num_sizes; ++i) {
+        if (pixel_size == supported_sizes[i]) {
+            return scheme_names[i];
+        }
+    }
+
+    // Find the closest match
+    int32_t closest_size = supported_sizes[0];
+    std::string closest_scheme = scheme_names[0];
+    int32_t min_diff = std::abs(pixel_size - supported_sizes[0]);
+
+    for (int i = 1; i < num_sizes; ++i) {
+        int32_t diff = std::abs(pixel_size - supported_sizes[i]);
+        if (diff < min_diff) {
+            min_diff = diff;
+            closest_size = supported_sizes[i];
+            closest_scheme = scheme_names[i];
+        }
+    }
+
+    INSPIRE_LOGW("Input pixel size %d is not supported. Choosing the closest scheme: %s closest_scheme for size %d.", 
+            pixel_size, closest_scheme.c_str(), closest_size);
+
+    return closest_scheme;
+}
 
 }   // namespace hyper
diff --git a/cpp/inspireface/track_module/face_track.h b/cpp/inspireface/track_module/face_track.h
@@ -138,6 +138,13 @@ class INSPIRE_API FaceTrack {
      */
     int InitFacePoseModel(InspireModel& model);
 
+    /**
+     * @brief Select the detection model scheme to be used according to the input pixel level.
+     * @param pixel_size Currently, only 160, 320, and 640 pixel sizes are supported.
+     * @return Return the corresponding scheme name, only ”face_detect_160”, ”face_detect_320”, ”face_detect_640” are supported.
+     */
+    std::string ChoiceMultiLevelDetectModel(const int32_t pixel_size);
+    
 public:
 
     /**
diff --git a/cpp/inspireface/version.txt b/cpp/inspireface/version.txt
@@ -1 +1 @@
-InspireFace Version: 1.1.1
+InspireFace Version: 1.1.2
diff --git a/cpp/sample/CMakeLists.txt b/cpp/sample/CMakeLists.txt
@@ -6,7 +6,7 @@ option(ISF_BUILD_SAMPLE_CLUTTERED "Whether to compile the cluttered sample progr
 include_directories(${SRC_DIR})
 
 if (ISF_ENABLE_RKNN)
-    set(ISF_RKNN_API_LIB ${ISF_THIRD_PARTY_DIR}/${ISF_RKNPU_MAJOR}/runtime/${ISF_RK_DEVICE_TYPE}/Linux/librknn_api/${CPU_ARCH}/)
+        set(ISF_RKNN_API_LIB ${ISF_THIRD_PARTY_DIR}/inspireface-precompile/rknn/${ISF_RKNPU_MAJOR}/runtime/${ISF_RK_DEVICE_TYPE}/Linux/librknn_api/${CPU_ARCH}/)
     link_directories(${ISF_RKNN_API_LIB})
     set(ext rknn_api dl)
 endif ()
@@ -38,12 +38,16 @@ set_target_properties(MTFaceTrackSample PROPERTIES
         RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/sample/"
 )
 
-# Examples of face detection and tracking
-add_executable(FaceTrackVideoSample cpp/sample_face_track_video.cpp)
-target_link_libraries(FaceTrackVideoSample InspireFace ${ext})
-set_target_properties(FaceTrackVideoSample PROPERTIES
-        RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/sample/"
-)
+if(NOT DISABLE_GUI)
+        # Examples of face detection and tracking
+        add_executable(FaceTrackVideoSample cpp/sample_face_track_video.cpp)
+        target_link_libraries(FaceTrackVideoSample InspireFace ${ext})
+        set_target_properties(FaceTrackVideoSample PROPERTIES
+                RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/sample/"
+        )
+endif()
+
+
 
 # Examples of face recognition
 add_executable(FaceRecognitionSample cpp/sample_face_recognition.cpp)
diff --git a/cpp/sample/cpp/face_detect.cpp b/cpp/sample/cpp/face_detect.cpp
@@ -4,7 +4,9 @@
 #include <cstddef>
 #include <iostream>
 #include <opencv2/core/types.hpp>
+#ifndef DISABLE_GUI
 #include <opencv2/highgui.hpp>
+#endif
 #include <opencv2/imgproc.hpp>
 #include <vector>
 #include "data_type.h"
@@ -38,10 +40,10 @@ int main() {
         auto &item = results[i];
         cv::rectangle(img, cv::Point2f(item.x1, item.y1), cv::Point2f(item.x2, item.y2), cv::Scalar(0, 0, 255), 4);
     }
-
+#ifndef DISABLE_GUI
     cv::imshow("w", img);
     cv::waitKey(0);
-
+#endif
 
     return 0;
 }
diff --git a/cpp/sample/cpp/sample_face_track.cpp b/cpp/sample/cpp/sample_face_track.cpp
@@ -31,9 +31,9 @@ int main(int argc, char* argv[]) {
     // Non-video or frame sequence mode uses IMAGE-MODE, which is always face detection without tracking
     HFDetectMode detMode = HF_DETECT_MODE_ALWAYS_DETECT;
     // Maximum number of faces detected
-    HInt32 maxDetectNum = 5;
+    HInt32 maxDetectNum = 20;
     // Face detection image input level
-    HInt32 detectPixelLevel = 640;
+    HInt32 detectPixelLevel = 160;
     // Handle of the current face SDK algorithm context
     HFSession session = {0};
     ret = HFCreateInspireFaceSessionOptional(option, detMode, maxDetectNum, detectPixelLevel, -1, &session);
@@ -42,7 +42,7 @@ int main(int argc, char* argv[]) {
         return ret;
     }
 
-    HFSessionSetTrackPreviewSize(session, 640);
+    HFSessionSetTrackPreviewSize(session, detectPixelLevel);
     HFSessionSetFilterMinimumFacePixelSize(session, 32);
     
     // Load a image
diff --git a/cpp/sample/cpp/sample_face_track_cost.cpp b/cpp/sample/cpp/sample_face_track_cost.cpp
@@ -29,12 +29,12 @@ int main(int argc, char* argv[]) {
     // Enable the functions in the pipeline: mask detection, live detection, and face quality detection
     HOption option = HF_ENABLE_QUALITY | HF_ENABLE_MASK_DETECT | HF_ENABLE_LIVENESS;
     // Non-video or frame sequence mode uses IMAGE-MODE, which is always face detection without tracking
-    HFDetectMode detMode = HF_DETECT_MODE_LIGHT_TRACK;
+    HFDetectMode detMode = HF_DETECT_MODE_ALWAYS_DETECT;
     // Maximum number of faces detected
     HInt32 maxDetectNum = 50;
     // Handle of the current face SDK algorithm context
     HFSession session = {0};
-    ret = HFCreateInspireFaceSessionOptional(option, detMode, maxDetectNum, -1, -1, &session);
+    ret = HFCreateInspireFaceSessionOptional(option, detMode, maxDetectNum, 160, -1, &session);
     if (ret != HSUCCEED) {
         std::cout << "Create FaceContext error: " << ret << std::endl;
         return ret;
diff --git a/cpp/sample/cpp/sample_face_track_video.cpp b/cpp/sample/cpp/sample_face_track_video.cpp
@@ -48,9 +48,9 @@ int main(int argc, char* argv[]) {
     // Video or frame sequence mode uses VIDEO-MODE, which is face detection with tracking
     HFDetectMode detMode = HF_DETECT_MODE_TRACK_BY_DETECTION;
     // Maximum number of faces detected
-    HInt32 maxDetectNum = 5;
+    HInt32 maxDetectNum = 20;
     // Face detection image input level
-    HInt32 detectPixelLevel = 640;
+    HInt32 detectPixelLevel = 320;
     // fps in tracking-by-detection mode
     HInt32 trackByDetectFps = 20;
     HFSession session = {0};
@@ -61,8 +61,8 @@ int main(int argc, char* argv[]) {
         return ret;
     }
 
-    HFSessionSetTrackPreviewSize(session, 640);
-    HFSessionSetFilterMinimumFacePixelSize(session, 32);
+    HFSessionSetTrackPreviewSize(session, detectPixelLevel);
+    HFSessionSetFilterMinimumFacePixelSize(session, 0);
 
     // Open the video file
     cv::VideoCapture cap(videoPath);
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
@@ -20,7 +20,7 @@ endif ()
 
 if (ISF_ENABLE_RKNN)
     set(DEPEND rknn_api dl)
-    set(ISF_RKNN_API_LIB ${ISF_THIRD_PARTY_DIR}/${ISF_RKNPU_MAJOR}/runtime/${ISF_RK_DEVICE_TYPE}/Linux/librknn_api/${CPU_ARCH}/)
+    set(ISF_RKNN_API_LIB ${ISF_THIRD_PARTY_DIR}/inspireface-precompile/rknn/${ISF_RKNPU_MAJOR}/runtime/${ISF_RK_DEVICE_TYPE}/Linux/librknn_api/${CPU_ARCH}/)
     message("Enable RKNN Inference")
     link_directories(${ISF_RKNN_API_LIB})
     set(DEPEND rknn_api dl)
diff --git a/cpp/test/settings/enviro.h b/cpp/test/settings/enviro.h
@@ -18,6 +18,7 @@ class Enviro {
     void operator=(Enviro const&) = delete;
 
     std::string getPackName() const { return packName; }
+    
     void setPackName(const std::string& name) { packName = name; }
 
     const std::string &getTestResDir() const { return testResDir; }
diff --git a/cpp/test/test.cpp b/cpp/test/test.cpp
diff --git a/cpp/test/unit/api/test_face_track.cpp b/cpp/test/unit/api/test_face_track.cpp
diff --git a/cpp/test/unit/api/test_feature_manage.cpp b/cpp/test/unit/api/test_feature_manage.cpp
diff --git a/doc/Benchmark-Remark(Updating).md b/doc/Benchmark-Remark(Updating).md

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-InspireFace Version: 1.1.1`
	`1`	`+InspireFace Version: 1.1.2`