|
| 1 | +#include <algorithm> |
| 2 | +#include <functional> |
| 3 | +#include <utility> |
| 4 | +#include <vector> |
| 5 | + |
| 6 | +#include "caffe/layers/tensor2box_layer.hpp" |
| 7 | + |
| 8 | +namespace caffe { |
| 9 | + |
| 10 | +template <typename Dtype> |
| 11 | +void Tensor2BoxLayer<Dtype>::LayerSetUp( |
| 12 | + const vector<Blob<Dtype> *> &bottom, const vector<Blob<Dtype> *> &top) { |
| 13 | + const Tensor2BoxParameter &tensor2box_param = |
| 14 | + this->layer_param_.tensor2box_param(); |
| 15 | + num_classes_ = tensor2box_param.num_classes(); |
| 16 | + img_dim_h_ = tensor2box_param.img_dim_h(); |
| 17 | + img_dim_w_ = tensor2box_param.img_dim_w(); |
| 18 | + anchors_x_.clear(); |
| 19 | + std::copy(tensor2box_param.anchors_x().begin(), |
| 20 | + tensor2box_param.anchors_x().end(), |
| 21 | + std::back_inserter(anchors_x_)); |
| 22 | + anchors_y_.clear(); |
| 23 | + std::copy(tensor2box_param.anchors_y().begin(), |
| 24 | + tensor2box_param.anchors_y().end(), |
| 25 | + std::back_inserter(anchors_y_)); |
| 26 | + CHECK_EQ(anchors_x_.size(), anchors_y_.size()) |
| 27 | + << "anchor_x and anchor_y should have the same length."; |
| 28 | +} |
| 29 | + |
| 30 | +template <typename Dtype> |
| 31 | +void Tensor2BoxLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, |
| 32 | + const vector<Blob<Dtype>*>& top) { |
| 33 | + // python example has input = (1, 27, 20, 30) |
| 34 | + // and output = (1, 1800, 9) |
| 35 | + const int count = bottom[0]->count(); |
| 36 | + // output shape = (num_samples, -1, 4+1+num_classes) |
| 37 | + // (1,-1,9) = (1, 1800, 9) |
| 38 | + // while input shape = (1,27,20,30) |
| 39 | + vector<int> new_shape(3, 0); // (0, 0, 0) |
| 40 | + new_shape[0] = bottom[0]->shape(0); |
| 41 | + new_shape[2] = 4 + 1 + num_classes_; |
| 42 | + new_shape[1] = count / new_shape[0] / new_shape[2]; |
| 43 | + top[0]->Reshape(new_shape); |
| 44 | + CHECK_EQ(count, top[0]->count()); |
| 45 | +} |
| 46 | + |
| 47 | +//void comput_grid_offsets(int grid_size_h, int grid_size_w, int img_dim_h, int ) |
| 48 | +/*template <typename Dtype> |
| 49 | +inline Dtype exp(Dtype x) { |
| 50 | + // std::exp(-x) for -x less than -87 will cause underflow 32bit float range |
| 51 | + //if (x < -86) return Dtype(0.0); |
| 52 | + return std::exp(x); |
| 53 | +}*/ |
| 54 | + |
| 55 | +template <typename Dtype> |
| 56 | +inline Dtype sigmoid(Dtype x) { |
| 57 | + return 0.5 * std::tanh(0.5 * x) + 0.5; |
| 58 | + // if use std::exp should clamp exp(-87 or lower), it will underflow 32bit float |
| 59 | + //return 1.0 / (1.0 + exp(-x)); |
| 60 | +} |
| 61 | + |
| 62 | +template <typename Dtype> |
| 63 | +void Tensor2BoxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, |
| 64 | + const vector<Blob<Dtype>*>& top) { |
| 65 | + // this layer is translating tensor2box python function from |
| 66 | + // cnn_models/pytorch/Nikon/4thBenchmark/ScenarioForAI-CVSubsystem/subModules/detection.py#L97-176 |
| 67 | + const Dtype *bottom_data = bottom[0]->cpu_data(); |
| 68 | + Dtype *top_data = top[0]->mutable_cpu_data(); |
| 69 | + // setup variables |
| 70 | + const int num_anchors = anchors_x_.size(); |
| 71 | + const int num_samples = bottom[0]->shape(0); |
| 72 | + const int H = bottom[0]->shape(2), W = bottom[0]->shape(3); |
| 73 | + const pair<int, int> grid_size(H, W); |
| 74 | + // 1st part: |
| 75 | + // data.reshape(num_samples, num_anchors, num_classes + 5, grid_size[0], grid_size[1]).transpose(0, 1, 3, 4, 2) |
| 76 | + // (1, 27, 20, 30) -> (1, 3, 20, 30, 9) |
| 77 | + // we can do the transpose operation during moving bottom_data to top_data |
| 78 | + const int _div = bottom[0]->shape(1) / num_anchors; |
| 79 | + int old_idx = 0; |
| 80 | + for (int s = 0; s < num_samples; ++s) { |
| 81 | + for (int a = 0; a < num_anchors; ++a) { |
| 82 | + for (int d = 0; d < _div; ++d) { |
| 83 | + for (int h = 0; h < H; ++h) { |
| 84 | + for (int w = 0; w < W; ++w) { |
| 85 | + // move old_idx==(s,a*num_anchors+d,h,w) to new_idx==(s,a,h,w,d) |
| 86 | + int new_idx = (((s*num_anchors+a)*H+h)*W+w)*_div+d; |
| 87 | + top_data[new_idx] = bottom_data[old_idx++]; |
| 88 | + } |
| 89 | + } |
| 90 | + } |
| 91 | + } |
| 92 | + } |
| 93 | + // 2nd part: compute_grid_offsets |
| 94 | + const int stride_w = img_dim_w_ / W; // stride_w = img_dim[1] / grid_size[1] |
| 95 | + const int stride_h = img_dim_h_ / H; // stride_h = img_dim[0] / grid_size[0] |
| 96 | + vector<Dtype>anchor_h(num_anchors, 0.0); // to store scaled anchor_h and anchor_w |
| 97 | + vector<Dtype>anchor_w(num_anchors, 0.0); // to store scaled anchor_h and anchor_w |
| 98 | + for (int i = 0; i < num_anchors; ++i) { |
| 99 | + anchor_w[i] = Dtype(anchors_x_[i]) / stride_w; |
| 100 | + anchor_h[i] = Dtype(anchors_y_[i]) / stride_h; |
| 101 | + } |
| 102 | + // 3rd part: pred_boxes |
| 103 | + for (int s = 0; s < num_samples; ++s) { |
| 104 | + for (int a = 0; a < num_anchors; ++a) { |
| 105 | + for (int h = 0; h < H; ++h) { |
| 106 | + for (int w = 0; w < W; ++w) { |
| 107 | + // 0,1,4,5~ sigmoid |
| 108 | + // 0,1 +grid_xy |
| 109 | + // 2,3 exp() * anchor |
| 110 | + // 0~4 *stride |
| 111 | + top_data[0] = (sigmoid(top_data[0]) + w) * stride_w; |
| 112 | + top_data[1] = (sigmoid(top_data[1]) + h) * stride_h; |
| 113 | + top_data[2] = (std::exp(top_data[2]) * anchor_w[a]) * stride_w; |
| 114 | + top_data[3] = (std::exp(top_data[3]) * anchor_h[a]) * stride_h; |
| 115 | + // pred_conf = sigmoid(prediction[..., 4]) # Conf |
| 116 | + // pred_cls = sigmoid(prediction[..., 5:]) # Cls pred. |
| 117 | + for (int i = 4; i < _div; ++i) { |
| 118 | + top_data[i] = sigmoid(top_data[i]); |
| 119 | + } |
| 120 | + top_data += _div; |
| 121 | + } |
| 122 | + } |
| 123 | + } |
| 124 | + } |
| 125 | + ;//end of forward_cpu |
| 126 | +} |
| 127 | + |
| 128 | +INSTANTIATE_CLASS(Tensor2BoxLayer); |
| 129 | +REGISTER_LAYER_CLASS(Tensor2Box); |
| 130 | + |
| 131 | +} // namespace caffe |
0 commit comments