add: yolo_obstacle_detector pipeline

qilinhu · qilinhu · commit edc1ad5f8a42 · 2023-02-07T10:28:20.000+08:00
Change-Id: I2b776ad4e61c4f78214408ad36cf6f7ae9748899
diff --git a/modules/perception/camera/lib/obstacle/detector/yolo/yolo_obstacle_detector.cc b/modules/perception/camera/lib/obstacle/detector/yolo/yolo_obstacle_detector.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  *****************************************************************************/
 #include "modules/perception/camera/lib/obstacle/detector/yolo/yolo_obstacle_detector.h"
+#include <boost/algorithm/string.hpp>
 
 #include "cyber/common/file.h"
 #include "cyber/common/log.h"
@@ -37,24 +38,30 @@ void YoloObstacleDetector::LoadInputShape(const yolo::ModelParam &model_param) {
   int resized_width = model_param.resized_width();
   int aligned_pixel = model_param.aligned_pixel();
   // inference input shape
-  int image_height = static_cast<int>(base_camera_model_->get_height());
-  int image_width = static_cast<int>(base_camera_model_->get_width());
-
-  offset_y_ =
-      static_cast<int>(offset_ratio * static_cast<float>(image_height) + .5f);
-  float roi_ratio = cropped_ratio * static_cast<float>(image_height) /
-                    static_cast<float>(image_width);
-  width_ = static_cast<int>(resized_width + aligned_pixel / 2) / aligned_pixel *
-           aligned_pixel;
-  height_ = static_cast<int>(static_cast<float>(width_) * roi_ratio +
-                             static_cast<float>(aligned_pixel) / 2.0f) /
-            aligned_pixel * aligned_pixel;
-
-  AINFO << "image_height=" << image_height << ", "
-        << "image_width=" << image_width << ", "
-        << "roi_ratio=" << roi_ratio;
-  AINFO << "offset_y=" << offset_y_ << ", height=" << height_
-        << ", width=" << width_;
+  //TODO: need to optimization
+  for (size_t i = 0; i < camera_names_.size(); ++i) {
+    base_camera_model_ = 
+        name_basemodel_map_.at(camera_names_[i]);
+    int image_height = static_cast<int>(base_camera_model_->get_height());
+    int image_width = static_cast<int>(base_camera_model_->get_width());
+
+    offset_y_ =
+        static_cast<int>(offset_ratio * static_cast<float>(image_height) + .5f);
+    float roi_ratio = cropped_ratio * static_cast<float>(image_height) /
+                      static_cast<float>(image_width);
+    width_ = static_cast<int>(resized_width + aligned_pixel / 2) / aligned_pixel *
+            aligned_pixel;
+    height_ = static_cast<int>(static_cast<float>(width_) * roi_ratio +
+                              static_cast<float>(aligned_pixel) / 2.0f) /
+              aligned_pixel * aligned_pixel;
+    offset_y_map_.insert(
+        std::pair<std::string, int>(camera_names_[i], offset_y_));  
+    AINFO << "image_height=" << image_height << ", "
+          << "image_width=" << image_width << ", "
+          << "roi_ratio=" << roi_ratio;
+    AINFO << "offset_y=" << offset_y_ << ", height=" << height_
+          << ", width=" << width_;
+  } 
 }
 
 void YoloObstacleDetector::LoadParam(const yolo::YoloParam &yolo_param) {
@@ -300,17 +307,37 @@ bool YoloObstacleDetector::Init(const StageConfig& stage_config) {
     return false;
   }
 
-  yolo_obstacle_detector_config_ = stage_config.yolo_obstacle_detector_config();
+  ACHECK(stage_config.has_camera_detector_config());
+  auto yolo_obstacle_detector_config_ =
+      stage_config.camera_detector_config();
+
   gpu_id_ = yolo_obstacle_detector_config_.gpu_id();
   BASE_CUDA_CHECK(cudaSetDevice(gpu_id_));
   BASE_CUDA_CHECK(cudaStreamCreate(&stream_));
 
-  base_camera_model_ =
-      common::SensorManager::Instance()->GetUndistortCameraModel(
-          yolo_obstacle_detector_config_.camera_name());
-  ACHECK(base_camera_model_ != nullptr) << "base_camera_model is nullptr!";
+  std::string camera_name =
+          yolo_obstacle_detector_config_.camera_name();
+  boost::algorithm::split(camera_names_, camera_name,
+                              boost::algorithm::is_any_of(","));
+                              
+  for (size_t i = 0; i < camera_names_.size(); ++i) {
+    std::shared_ptr<base::BaseCameraModel> base_model_ptr =
+        common::SensorManager::Instance()->GetUndistortCameraModel(
+            camera_names_[i]);
+    name_basemodel_map_.insert(
+        std::pair<std::string, std::shared_ptr<base::BaseCameraModel>>(
+            camera_names_[i], base_model_ptr));
+    ACHECK(base_model_ptr != nullptr) << "base_camera_model is nullptr!";
+  }
+
+  std::string config_path =
+      GetAbsolutePath(yolo_obstacle_detector_config_.root_dir(),
+                      yolo_obstacle_detector_config_.conf_file());
+  if (!cyber::common::GetProtoFromFile(config_path, &yolo_param_)) {
+    AERROR << "read proto_config fail";
+    return false;
+  }
 
-  yolo_param_ = yolo_obstacle_detector_config_.yolo_param();
   const auto &model_param = yolo_param_.model_param();
   // todo(zero): options.root_dir
   std::string root_dir = yolo_obstacle_detector_config_.root_dir();
@@ -347,6 +374,78 @@ bool YoloObstacleDetector::Init(const StageConfig& stage_config) {
 }
 
 bool YoloObstacleDetector::Process(DataFrame* data_frame) {
+  if (data_frame == nullptr) {
+    return false;
+  }
+  auto frame = data_frame->camera_frame;
+
+  Timer timer;
+  if (cudaSetDevice(gpu_id_) != cudaSuccess) {
+    AERROR << "Failed to set device to " << gpu_id_;
+    return false;
+  }
+
+  auto input_blob = inference_->get_blob(yolo_param_.net_param().input_blob());
+  AINFO << "Start: " << static_cast<double>(timer.Toc()) * 0.001 << "ms";
+  DataProvider::ImageOptions image_options;
+  base_camera_model_ = 
+      name_basemodel_map_.at(frame->data_provider->sensor_name());
+  offset_y_ = offset_y_map_.at(frame->data_provider->sensor_name());
+  image_options.target_color = base::Color::BGR;
+  image_options.crop_roi = base::RectI(
+      0, offset_y_, static_cast<int>(base_camera_model_->get_width()),
+      static_cast<int>(base_camera_model_->get_height()) - offset_y_);
+  image_options.do_crop = true;
+  frame->data_provider->GetImage(image_options, image_.get());
+  AINFO << "GetImageBlob: " << static_cast<double>(timer.Toc()) * 0.001 << "ms";
+  inference::ResizeGPU(*image_, input_blob, frame->data_provider->src_width(),
+                       0);
+  AINFO << "Resize: " << static_cast<double>(timer.Toc()) * 0.001 << "ms";
+
+  /////////////////////////// detection part ///////////////////////////
+  inference_->Infer();
+  AINFO << "Network Forward: " << static_cast<double>(timer.Toc()) * 0.001
+        << "ms";
+  get_objects_cpu(yolo_blobs_, stream_, types_, nms_, yolo_param_.model_param(),
+                  light_vis_conf_threshold_, light_swt_conf_threshold_,
+                  overlapped_.get(), idx_sm_.get(), &(frame->detected_objects));
+
+  AINFO << "GetObj: " << static_cast<double>(timer.Toc()) * 0.001 << "ms";
+  filter_bbox(min_dims_, &(frame->detected_objects));
+  FeatureExtractorOptions feat_options;
+  feat_options.normalized = true;
+  AINFO << "Post1: " << static_cast<double>(timer.Toc()) * 0.001 << "ms";
+  feature_extractor_->Extract(feat_options, frame);
+  AINFO << "Extract: " << static_cast<double>(timer.Toc()) * 0.001 << "ms";
+  recover_bbox(frame->data_provider->src_width(),
+               frame->data_provider->src_height() - offset_y_, offset_y_,
+               &frame->detected_objects);
+
+  // post processing
+  int left_boundary =
+      static_cast<int>(border_ratio_ * static_cast<float>(image_->cols()));
+  int right_boundary = static_cast<int>((1.0f - border_ratio_) *
+                                        static_cast<float>(image_->cols()));
+  for (auto &obj : frame->detected_objects) {
+    // recover alpha
+    obj->camera_supplement.alpha /= ori_cycle_;
+    // get area_id from visible_ratios
+    if (yolo_param_.model_param().num_areas() == 0) {
+      obj->camera_supplement.area_id =
+          get_area_id(obj->camera_supplement.visible_ratios);
+    }
+    // clear cut off ratios
+    auto &box = obj->camera_supplement.box;
+    if (box.xmin >= left_boundary) {
+      obj->camera_supplement.cut_off_ratios[2] = 0;
+    }
+    if (box.xmax <= right_boundary) {
+      obj->camera_supplement.cut_off_ratios[3] = 0;
+    }
+  }
+  AINFO << "Post2: " << static_cast<double>(timer.Toc()) * 0.001 << "ms";
+
+  return true;
   return true;
 }
 
diff --git a/modules/perception/camera/lib/obstacle/detector/yolo/yolo_obstacle_detector.h b/modules/perception/camera/lib/obstacle/detector/yolo/yolo_obstacle_detector.h
@@ -71,13 +71,17 @@ class YoloObstacleDetector : public BaseObstacleDetector {
   bool InitFeatureExtractor(const std::string &root_dir);
 
  private:
+  std::map<std::string, std::shared_ptr<base::BaseCameraModel>>
+      name_basemodel_map_;
+  std::map<std::string, int> offset_y_map_;
   std::shared_ptr<BaseFeatureExtractor> feature_extractor_;
   yolo::YoloParam yolo_param_;
   std::shared_ptr<base::BaseCameraModel> base_camera_model_ = nullptr;
   std::shared_ptr<inference::Inference> inference_;
   std::vector<base::ObjectSubType> types_;
   std::vector<float> expands_;
   std::vector<float> anchors_;
+  std::vector<std::string> camera_names_;
 
   NMSParam nms_;
   cudaStream_t stream_ = nullptr;
diff --git a/modules/perception/pipeline/BUILD b/modules/perception/pipeline/BUILD
@@ -100,6 +100,7 @@ cc_library(
     "//modules/common/util:util_tool",
     "//modules/perception/camera/lib/obstacle/camera_detection_postprocessor",
     "//modules/perception/camera/lib/obstacle/detector/smoke:smoke_obstacle_detector",
+    "//modules/perception/camera/lib/obstacle/detector/yolo:yolo_obstacle_detector",
     "//modules/perception/camera/lib/obstacle/preprocessor:camera_detection_preprocessor",
     "//modules/perception/camera/lib/obstacle/tracker/omt:omt_obstacle_tracker",
     "//modules/perception/camera/lib/obstacle/tracker/omt2:omt_bev_tracker",
diff --git a/modules/perception/pipeline/config/camera_detection_pipeline.pb.txt b/modules/perception/pipeline/config/camera_detection_pipeline.pb.txt
@@ -1,5 +1,5 @@
 stage_type: OMT_OBSTACLE_TRACKER
-stage_type: SMOKE_OBSTACLE_DETECTION
+stage_type: YOLO_OBSTACLE_DETECTOR
 stage_type: OMT_OBSTACLE_TRACKER
 stage_type: MULTI_CUE_OBSTACLE_TRANSFORMER
 stage_type: LOCATION_REFINER_OBSTACLE_POSTPROCESSOR
@@ -89,15 +89,15 @@ stage_config: {
 }
 
 stage_config: {
-  stage_type: SMOKE_OBSTACLE_DETECTION
+  stage_type: YOLO_OBSTACLE_DETECTOR
   enabled: true
   type: "camera_detector"
 
   camera_detector_config {
     gpu_id: 0
     camera_name: "front_6mm,front_12mm"
     root_dir: "/apollo/modules/perception/production/data/perception/camera/models/yolo_obstacle_detector"
-    conf_file: "smoke-config.pt"
+    conf_file: "config.pt"
   }
 }
 
diff --git a/modules/perception/pipeline/pipeline.cc b/modules/perception/pipeline/pipeline.cc
@@ -22,6 +22,7 @@
 #include "modules/perception/camera/lib/obstacle/detector/bev_detection/bev_obstacle_detector.h"
 #include "modules/perception/camera/lib/obstacle/detector/caddn/caddn_obstacle_detector.h"
 #include "modules/perception/camera/lib/obstacle/detector/smoke/smoke_obstacle_detector.h"
+#include "modules/perception/camera/lib/obstacle/detector/yolo/yolo_obstacle_detector.h"
 #include "modules/perception/camera/lib/obstacle/postprocessor/location_refiner/location_refiner_obstacle_postprocessor.h"
 #include "modules/perception/camera/lib/obstacle/preprocessor/camera_detection_preprocessor.h"
 #include "modules/perception/camera/lib/obstacle/tracker/omt/omt_obstacle_tracker.h"
@@ -184,6 +185,9 @@ std::shared_ptr<Stage> Pipeline::CreateStage(const StageType& stage_type) {
     case StageType::SMOKE_OBSTACLE_DETECTION:
       stage_ptr.reset(new camera::SmokeObstacleDetector());
       break;
+    case StageType::YOLO_OBSTACLE_DETECTOR:
+      stage_ptr.reset(new camera::YoloObstacleDetector());
+      break;
     case StageType::CAMERA_DETECTION_PREPROCESSOR:
       stage_ptr.reset(new camera::CameraDetectionPreprocessor());
       break;
diff --git a/modules/perception/production/data/perception/camera/models/yolo_obstacle_detector/config.pt b/modules/perception/production/data/perception/camera/models/yolo_obstacle_detector/config.pt
@@ -0,0 +1,39 @@
+model_param {
+  model_name: "3d-r4-half_caffe"
+  model_type: "RTNetInt8"
+  weight_file: "deploy.model"
+  proto_file: "deploy.pt"
+  anchors_file: "anchors.txt"
+  types_file: "types.txt"
+  calibratetable_root: "./3d-r4-half_caffe"
+  confidence_threshold: 0.4
+  offset_ratio: 0.288889
+  cropped_ratio: 0.711111
+  resized_width: 1440
+  aligned_pixel: 32
+  min_2d_height: 10
+  min_3d_height: 0.1
+  ori_cycle: 2
+  with_box3d: true
+  light_swt_conf_threshold: 0
+  light_vis_conf_threshold: 0
+  with_lights: true
+  with_ratios: false
+  # num_areas: 4
+  border_ratio: 0.01
+}
+net_param {
+  det1_loc_blob: "loc_pred"
+  det1_obj_blob: "obj_pred"
+  det1_cls_blob: "cls_pred"
+  det1_ori_blob: "ori_pred"
+  det1_dim_blob: "dim_pred"
+  input_blob: "data"
+  feat_blob: "conv3_3"
+}
+nms_param {
+  type: "NormalNMS"
+  threshold: 0.5
+  sigma: 0.4
+  inter_cls_nms_thresh: 0.6
+}