add facial-landmarks-98-detection model to gaze_estimation_demo (#2981)

VoronovaIntern · vladimir-dudnik · web-flow · commit e8c9717d0828 · 2021-12-22T11:20:28.000+03:00
* add 98 model to gaze_estimation_demo

* delete mistakes

* comment new model

* solved bias

* solve mistakes

* style and fix crash when no eyes or only one eye visible/open

Co-authored-by: Vladimir Dudnik &lt;vladimir.dudnik@intel.com&gt;
diff --git a/demos/gaze_estimation_demo/cpp/README.md b/demos/gaze_estimation_demo/cpp/README.md
@@ -8,7 +8,7 @@ The demo also relies on the following auxiliary networks:
 
 * `face-detection-retail-0004` or `face-detection-adas-0001` detection networks for finding faces
 * `head-pose-estimation-adas-0001`, which estimates head pose in Tait-Bryan angles, serving as an input for gaze estimation model
-* `facial-landmarks-35-adas-0002`, which estimates coordinates of facial landmarks for detected faces. The keypoints at the corners of eyes are used to locate eyes regions required for the gaze estimation model
+* `facial-landmarks-35-adas-0002` or `facial-landmarks-98-detection-0001`, which estimates coordinates of facial landmarks for detected faces. The keypoints at the corners of eyes are used to locate eyes regions required for the gaze estimation model
 * `open-closed-eye-0001`, which estimates eyes state of detected faces.
 
 Other demo objectives are:
@@ -47,6 +47,7 @@ omz_converter --list models.lst
 ### Supported Models
 
 * facial-landmarks-35-adas-0002
+* facial-landmarks-98-detection-0001
 * face-detection-adas-0001
 * face-detection-retail-0004
 * face-detection-retail-0005
diff --git a/demos/gaze_estimation_demo/cpp/gaze_estimation_demo.hpp b/demos/gaze_estimation_demo/cpp/gaze_estimation_demo.hpp
@@ -7,10 +7,10 @@
 
 #include <string>
 #include <vector>
-#include <gflags/gflags.h>
 #include <iostream>
 
-#include <utils/default_flags.hpp>
+#include "gflags/gflags.h"
+#include "utils/default_flags.hpp"
 
 DEFINE_INPUT_FLAGS
 DEFINE_OUTPUT_FLAGS
diff --git a/demos/gaze_estimation_demo/cpp/include/face_inference_results.hpp b/demos/gaze_estimation_demo/cpp/include/face_inference_results.hpp
@@ -6,7 +6,7 @@
 
 #include <vector>
 #include <opencv2/core/core.hpp>
-#include <utils/slog.hpp>
+#include "utils/slog.hpp"
 
 namespace gaze_estimation {
 struct FaceInferenceResults {
@@ -25,6 +25,8 @@ struct FaceInferenceResults {
 
     cv::Point3f gazeVector;
 
+    std::vector<cv::Point2f> getEyeLandmarks();
+
     friend slog::LogStream& operator<<(slog::LogStream& os, const FaceInferenceResults& faceInferenceResults);
 };
 
diff --git a/demos/gaze_estimation_demo/cpp/include/ie_wrapper.hpp b/demos/gaze_estimation_demo/cpp/include/ie_wrapper.hpp
@@ -6,13 +6,12 @@
 
 #include <cstdio>
 #include <string>
-
 #include <map>
 #include <vector>
 
-#include <utils/common.hpp>
-#include <utils/ocv_common.hpp>
-#include <utils/slog.hpp>
+#include "utils/common.hpp"
+#include "utils/ocv_common.hpp"
+#include "utils/slog.hpp"
 
 namespace gaze_estimation {
 class IEWrapper {
diff --git a/demos/gaze_estimation_demo/cpp/include/landmarks_estimator.hpp b/demos/gaze_estimation_demo/cpp/include/landmarks_estimator.hpp
@@ -27,5 +27,15 @@ class LandmarksEstimator: public BaseEstimator {
 private:
     IEWrapper ieWrapper;
     std::string inputBlobName, outputBlobName;
+    size_t numberLandmarks;
+    std::vector<cv::Point2i> simplePostprocess(cv::Rect faceBoundingBox, cv::Mat faceCrop);
+    std::vector<cv::Point2i> heatMapPostprocess(cv::Rect faceBoundingBox, cv::Mat faceCrop);
+    std::vector<cv::Mat> split(std::vector<float>& data, const std::vector<unsigned long>& shape);
+    std::vector<cv::Point2f> getMaxPreds(std::vector<cv::Mat> heatMaps);
+    int sign(float number);
+    cv::Mat affineTransform(cv::Point2f center, cv::Point2f scale,
+        float rot, size_t dst_w, size_t dst_h, cv::Point2f shift, bool inv);
+    cv::Point2f rotatePoint(cv::Point2f pt, float angle_rad);
+    cv::Point2f get3rdPoint(cv::Point2f a, cv::Point2f b);
 };
 }  // namespace gaze_estimation
diff --git a/demos/gaze_estimation_demo/cpp/include/utils.hpp b/demos/gaze_estimation_demo/cpp/include/utils.hpp
@@ -6,15 +6,14 @@
 
 #include <cstdio>
 #include <string>
-
 #include <utility>
 #include <map>
 #include <vector>
 
 #include <inference_engine.hpp>
 
-#include <utils/ocv_common.hpp>
-#include <utils/slog.hpp>
+#include "utils/ocv_common.hpp"
+#include "utils/slog.hpp"
 
 namespace gaze_estimation {
 void gazeVectorToGazeAngles(const cv::Point3f& gazeVector, cv::Point2f& gazeAngles);
diff --git a/demos/gaze_estimation_demo/cpp/main.cpp b/demos/gaze_estimation_demo/cpp/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -7,7 +7,6 @@
 * \file gaze_estimation_demo/main.cpp
 * \example gaze_estimation_demo/main.cpp
 */
-#include <gflags/gflags.h>
 #include <functional>
 #include <iostream>
 #include <fstream>
@@ -27,33 +26,30 @@
 
 #include <inference_engine.hpp>
 
+#include <gflags/gflags.h>
 #include <monitors/presenter.h>
 #include <utils/args_helper.hpp>
 #include <utils/images_capture.h>
 #include <utils/ocv_common.hpp>
 #include <utils/performance_metrics.hpp>
 #include <utils/slog.hpp>
 
-#include "gaze_estimation_demo.hpp"
-
 #include "face_inference_results.hpp"
-
 #include "face_detector.hpp"
-
 #include "base_estimator.hpp"
 #include "head_pose_estimator.hpp"
 #include "landmarks_estimator.hpp"
 #include "eye_state_estimator.hpp"
 #include "gaze_estimator.hpp"
-
 #include "results_marker.hpp"
-
 #include "utils.hpp"
 
+#include "gaze_estimation_demo.hpp"
+
 using namespace gaze_estimation;
 
-bool ParseAndCheckCommandLine(int argc, char *argv[]) {
-    // ---------------------------Parsing and validating input arguments--------------------------------------
+bool ParseAndCheckCommandLine(int argc, char* argv[]) {
+    // Parsing and validating input arguments
     gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
     if (FLAGS_h) {
         showUsage();
@@ -78,11 +74,11 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
 }
 
 
-int main(int argc, char *argv[]) {
+int main(int argc, char* argv[]) {
     try {
         PerformanceMetrics metrics;
 
-        // ------------------------------ Parsing and validating of input arguments --------------------------
+        // Parsing and validating of input arguments
         if (!ParseAndCheckCommandLine(argc, argv)) {
             return 0;
         }
@@ -108,8 +104,8 @@ int main(int argc, char *argv[]) {
         int delay = 1;
         std::string windowName = "Gaze estimation demo";
 
-        std::unique_ptr<ImagesCapture> cap = openImagesCapture(FLAGS_i, FLAGS_loop, 0,
-            std::numeric_limits<size_t>::max(), stringToSize(FLAGS_res));
+        std::unique_ptr<ImagesCapture> cap = openImagesCapture(
+            FLAGS_i, FLAGS_loop, 0, std::numeric_limits<size_t>::max(), stringToSize(FLAGS_res));
 
         auto startTime = std::chrono::steady_clock::now();
         cv::Mat frame = cap->read();
@@ -118,8 +114,9 @@ int main(int argc, char *argv[]) {
         }
 
         cv::VideoWriter videoWriter;
-        if (!FLAGS_o.empty() && !videoWriter.open(FLAGS_o, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'),
-                                                  cap->fps(), frame.size())) {
+        if (!FLAGS_o.empty() &&
+            !videoWriter.open(FLAGS_o, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), cap->fps(), frame.size()))
+        {
             throw std::runtime_error("Can't open video writer");
         }
         uint32_t framesProcessed = 0;
diff --git a/demos/gaze_estimation_demo/cpp/models.lst b/demos/gaze_estimation_demo/cpp/models.lst
@@ -1,4 +1,5 @@
 # This file can be used with the --list option of the model downloader.
+#facial-landmarks-98-detection-????
 facial-landmarks-35-adas-????
 face-detection-adas-????
 face-detection-retail-????
diff --git a/demos/gaze_estimation_demo/cpp/src/eye_state_estimator.cpp b/demos/gaze_estimation_demo/cpp/src/eye_state_estimator.cpp
@@ -9,18 +9,17 @@
 
 namespace gaze_estimation {
 
-EyeStateEstimator::EyeStateEstimator(InferenceEngine::Core& ie,
-                                     const std::string& modelPath,
-                                     const std::string& deviceName):
-                                     ieWrapper(ie, modelPath, modelType, deviceName) {
+EyeStateEstimator::EyeStateEstimator(
+    InferenceEngine::Core& ie, const std::string& modelPath, const std::string& deviceName) :
+        ieWrapper(ie, modelPath, modelType, deviceName)
+{
     inputBlobName = ieWrapper.expectSingleInput();
     ieWrapper.expectImageInput(inputBlobName);
     outputBlobName = ieWrapper.expectSingleOutput();
 }
 
-cv::Rect EyeStateEstimator::createEyeBoundingBox(const cv::Point2i& p1,
-                                                 const cv::Point2i& p2,
-                                                 float scale) const {
+cv::Rect EyeStateEstimator::createEyeBoundingBox(
+    const cv::Point2i& p1, const cv::Point2i& p2, float scale) const {
     cv::Rect result;
     float size = static_cast<float>(cv::norm(p1 - p2));
 
@@ -35,9 +34,8 @@ cv::Rect EyeStateEstimator::createEyeBoundingBox(const cv::Point2i& p1,
     return result;
 }
 
-void EyeStateEstimator::rotateImageAroundCenter(const cv::Mat& srcImage,
-                                                cv::Mat& dstImage,
-                                                float angle) const {
+void EyeStateEstimator::rotateImageAroundCenter(
+    const cv::Mat& srcImage, cv::Mat& dstImage, float angle) const {
     auto w = srcImage.cols;
     auto h = srcImage.rows;
 
@@ -49,11 +47,13 @@ void EyeStateEstimator::rotateImageAroundCenter(const cv::Mat& srcImage,
     cv::warpAffine(srcImage, dstImage, rotMatrix, size, 1, cv::BORDER_REPLICATE);
 }
 
-void EyeStateEstimator::estimate(const cv::Mat& image, FaceInferenceResults& outputResults) {
+void EyeStateEstimator::estimate(
+    const cv::Mat& image, FaceInferenceResults& outputResults) {
     auto roll = outputResults.headPoseAngles.z;
+    std::vector<cv::Point2f> eyeLandmarks = outputResults.getEyeLandmarks();
 
-    outputResults.leftEyeMidpoint = (outputResults.faceLandmarks[0] + outputResults.faceLandmarks[1]) / 2;
-    auto leftEyeBoundingBox = createEyeBoundingBox(outputResults.faceLandmarks[0], outputResults.faceLandmarks[1]);
+    outputResults.leftEyeMidpoint = (eyeLandmarks[0] + eyeLandmarks[1]) / 2;
+    auto leftEyeBoundingBox = createEyeBoundingBox(eyeLandmarks[0], eyeLandmarks[1]);
     outputResults.leftEyeBoundingBox = leftEyeBoundingBox;
     if (leftEyeBoundingBox.area()) {
         auto leftEyeImage(cv::Mat(image, leftEyeBoundingBox));
@@ -70,8 +70,8 @@ void EyeStateEstimator::estimate(const cv::Mat& image, FaceInferenceResults& out
         outputResults.leftEyeState = false;
     }
 
-    outputResults.rightEyeMidpoint = (outputResults.faceLandmarks[2] + outputResults.faceLandmarks[3]) / 2;
-    auto rightEyeBoundingBox = createEyeBoundingBox(outputResults.faceLandmarks[2], outputResults.faceLandmarks[3]);
+    outputResults.rightEyeMidpoint = (eyeLandmarks[2] + eyeLandmarks[3]) / 2;
+    auto rightEyeBoundingBox = createEyeBoundingBox(eyeLandmarks[2], eyeLandmarks[3]);
     outputResults.rightEyeBoundingBox = rightEyeBoundingBox;
     if (rightEyeBoundingBox.area()) {
         auto rightEyeImage(cv::Mat(image, rightEyeBoundingBox));
diff --git a/demos/gaze_estimation_demo/cpp/src/face_detector.cpp b/demos/gaze_estimation_demo/cpp/src/face_detector.cpp
@@ -4,21 +4,20 @@
 
 #include <cstdio>
 #include <string>
-
 #include <vector>
 #include <map>
 
 #include "face_detector.hpp"
 
 namespace gaze_estimation {
-FaceDetector::FaceDetector(InferenceEngine::Core& ie,
-                           const std::string& modelPath,
-                           const std::string& deviceName,
-                           double detectionConfidenceThreshold,
-                           bool enableReshape):
-             ieWrapper(ie, modelPath, modelType, deviceName),
-             detectionThreshold(detectionConfidenceThreshold),
-             enableReshape(enableReshape) {
+
+FaceDetector::FaceDetector(
+    InferenceEngine::Core& ie, const std::string& modelPath, const std::string& deviceName,
+    double detectionConfidenceThreshold, bool enableReshape) :
+        ieWrapper(ie, modelPath, modelType, deviceName),
+        detectionThreshold(detectionConfidenceThreshold),
+        enableReshape(enableReshape)
+{
     const auto& inputInfo = ieWrapper.getInputBlobDimsInfo();
 
     inputBlobName = ieWrapper.expectSingleInput();
diff --git a/demos/gaze_estimation_demo/cpp/src/face_inference_results.cpp b/demos/gaze_estimation_demo/cpp/src/face_inference_results.cpp
@@ -27,4 +27,24 @@ slog::LogStream& operator<<(slog::LogStream& os, const FaceInferenceResults& fac
 
     return os;
 }
+
+std::vector<cv::Point2f> FaceInferenceResults::getEyeLandmarks() {
+    std::vector<cv::Point2f> result(4);
+    if (faceLandmarks.size() == 35) {
+        result[0] = faceLandmarks[0];
+        result[1] = faceLandmarks[1];
+        result[2] = faceLandmarks[2];
+        result[3] = faceLandmarks[3];
+    }
+    else if (faceLandmarks.size() == 98) {
+        result[0] = faceLandmarks[60];
+        result[1] = faceLandmarks[64];
+        result[2] = faceLandmarks[68];
+        result[3] = faceLandmarks[72];
+    }
+    else {
+        throw std::runtime_error("the network must output 35 or 98 points");
+    }
+    return result;
+}
 }  // namespace gaze_estimation
diff --git a/demos/gaze_estimation_demo/cpp/src/gaze_estimator.cpp b/demos/gaze_estimation_demo/cpp/src/gaze_estimator.cpp
@@ -14,11 +14,10 @@ const char BLOB_HEAD_POSE_ANGLES[] = "head_pose_angles";
 const char BLOB_LEFT_EYE_IMAGE[] = "left_eye_image";
 const char BLOB_RIGHT_EYE_IMAGE[] = "right_eye_image";
 
-GazeEstimator::GazeEstimator(InferenceEngine::Core& ie,
-                             const std::string& modelPath,
-                             const std::string& deviceName,
-                             bool doRollAlign):
-               ieWrapper(ie, modelPath, modelType, deviceName), rollAlign(doRollAlign) {
+GazeEstimator::GazeEstimator(
+    InferenceEngine::Core& ie, const std::string& modelPath, const std::string& deviceName, bool doRollAlign) :
+        ieWrapper(ie, modelPath, modelType, deviceName), rollAlign(doRollAlign)
+{
     const auto& inputInfo = ieWrapper.getInputBlobDimsInfo();
 
     for (const auto& blobName: {BLOB_HEAD_POSE_ANGLES, BLOB_LEFT_EYE_IMAGE, BLOB_RIGHT_EYE_IMAGE}) {
@@ -47,9 +46,7 @@ GazeEstimator::GazeEstimator(InferenceEngine::Core& ie,
     expectAngles(outputBlobName, outputInfo.at(outputBlobName));
 }
 
-void GazeEstimator::rotateImageAroundCenter(const cv::Mat& srcImage,
-                                              cv::Mat& dstImage,
-                                              float angle) const {
+void GazeEstimator::rotateImageAroundCenter(const cv::Mat& srcImage, cv::Mat& dstImage, float angle) const {
     auto w = srcImage.cols;
     auto h = srcImage.rows;
 
@@ -61,9 +58,8 @@ void GazeEstimator::rotateImageAroundCenter(const cv::Mat& srcImage,
     cv::warpAffine(srcImage, dstImage, rotMatrix, size, 1, cv::BORDER_REPLICATE);
 }
 
-void GazeEstimator::estimate(const cv::Mat& image,
-                             FaceInferenceResults& outputResults) {
-    if (!outputResults.leftEyeState && !outputResults.rightEyeState)
+void GazeEstimator::estimate(const cv::Mat& image, FaceInferenceResults& outputResults) {
+    if (!outputResults.leftEyeState || !outputResults.rightEyeState)
         return;
     std::vector<float> headPoseAngles(3);
     auto roll = outputResults.headPoseAngles.z;
diff --git a/demos/gaze_estimation_demo/cpp/src/head_pose_estimator.cpp b/demos/gaze_estimation_demo/cpp/src/head_pose_estimator.cpp
@@ -15,10 +15,10 @@ const std::pair<const char*, float cv::Point3f::*> OUTPUTS[] = {
     {"angle_r_fc", &cv::Point3f::z},
 };
 
-HeadPoseEstimator::HeadPoseEstimator(InferenceEngine::Core& ie,
-                                     const std::string& modelPath,
-                                     const std::string& deviceName):
-                   ieWrapper(ie, modelPath, modelType, deviceName) {
+HeadPoseEstimator::HeadPoseEstimator(
+    InferenceEngine::Core& ie, const std::string& modelPath, const std::string& deviceName) :
+        ieWrapper(ie, modelPath, modelType, deviceName)
+{
     inputBlobName = ieWrapper.expectSingleInput();
     ieWrapper.expectImageInput(inputBlobName);
 
@@ -39,8 +39,7 @@ HeadPoseEstimator::HeadPoseEstimator(InferenceEngine::Core& ie,
     }
 }
 
-void HeadPoseEstimator::estimate(const cv::Mat& image,
-                                 FaceInferenceResults& outputResults) {
+void HeadPoseEstimator::estimate(const cv::Mat& image, FaceInferenceResults& outputResults) {
     auto faceBoundingBox = outputResults.faceBoundingBox;
     auto faceCrop(cv::Mat(image, faceBoundingBox));
 
diff --git a/demos/gaze_estimation_demo/cpp/src/ie_wrapper.cpp b/demos/gaze_estimation_demo/cpp/src/ie_wrapper.cpp
diff --git a/demos/gaze_estimation_demo/cpp/src/landmarks_estimator.cpp b/demos/gaze_estimation_demo/cpp/src/landmarks_estimator.cpp
diff --git a/demos/gaze_estimation_demo/cpp/src/results_marker.cpp b/demos/gaze_estimation_demo/cpp/src/results_marker.cpp

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`# This file can be used with the --list option of the model downloader.`
	`2`	`+#facial-landmarks-98-detection-????`
`2`	`3`	`facial-landmarks-35-adas-????`
`3`	`4`	`face-detection-adas-????`
`4`	`5`	`face-detection-retail-????`