Skip to content

Commit 588789d

Browse files
committed
add tensor2box, which is translated from custom python function
1 parent 7440dae commit 588789d

File tree

3 files changed

+188
-1
lines changed

3 files changed

+188
-1
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#ifndef CAFFE_TENSOR2BOX_LAYER_HPP_
2+
#define CAFFE_TENSOR2BOX_LAYER_HPP_
3+
4+
#include <vector>
5+
6+
#include "caffe/blob.hpp"
7+
#include "caffe/layer.hpp"
8+
#include "caffe/proto/caffe.pb.h"
9+
10+
namespace caffe {
11+
12+
template <typename Dtype>
13+
class Tensor2BoxLayer : public Layer<Dtype> {
14+
public:
15+
explicit Tensor2BoxLayer(const LayerParameter& param) :
16+
Layer<Dtype>(param) {
17+
}
18+
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
19+
const vector<Blob<Dtype>*>& top);
20+
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
21+
const vector<Blob<Dtype>*>& top);
22+
23+
virtual inline const char* type() const {return "Tensor2Box";}
24+
virtual inline int ExactBottomBlobs() const {return 1;}
25+
virtual inline int ExactNumTopBlobs() const {return 1;}
26+
27+
protected:
28+
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
29+
const vector<Blob<Dtype>*>& top);
30+
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
31+
const vector<bool>& propagate_down,
32+
const vector<Blob<Dtype>*>& bottom) {
33+
NOT_IMPLEMENTED;
34+
}
35+
36+
int num_classes_;
37+
int img_dim_h_, img_dim_w_;
38+
vector<int> anchors_x_, anchors_y_;
39+
// We have a python example such that
40+
// anchor == [(10, 10), (10, 20), (50, 50)]
41+
// num_classes == 4
42+
// img_dim == (320, 480)
43+
};
44+
45+
} // namespace caffe
46+
47+
#endif // CAFFE_TENSOR2BOX_LAYER_HPP_
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#include <algorithm>
2+
#include <functional>
3+
#include <utility>
4+
#include <vector>
5+
6+
#include "caffe/layers/tensor2box_layer.hpp"
7+
8+
namespace caffe {
9+
10+
template <typename Dtype>
11+
void Tensor2BoxLayer<Dtype>::LayerSetUp(
12+
const vector<Blob<Dtype> *> &bottom, const vector<Blob<Dtype> *> &top) {
13+
const Tensor2BoxParameter &tensor2box_param =
14+
this->layer_param_.tensor2box_param();
15+
num_classes_ = tensor2box_param.num_classes();
16+
img_dim_h_ = tensor2box_param.img_dim_h();
17+
img_dim_w_ = tensor2box_param.img_dim_w();
18+
anchors_x_.clear();
19+
std::copy(tensor2box_param.anchors_x().begin(),
20+
tensor2box_param.anchors_x().end(),
21+
std::back_inserter(anchors_x_));
22+
anchors_y_.clear();
23+
std::copy(tensor2box_param.anchors_y().begin(),
24+
tensor2box_param.anchors_y().end(),
25+
std::back_inserter(anchors_y_));
26+
CHECK_EQ(anchors_x_.size(), anchors_y_.size())
27+
<< "anchor_x and anchor_y should have the same length.";
28+
}
29+
30+
template <typename Dtype>
31+
void Tensor2BoxLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
32+
const vector<Blob<Dtype>*>& top) {
33+
// python example has input = (1, 27, 20, 30)
34+
// and output = (1, 1800, 9)
35+
const int count = bottom[0]->count();
36+
// output shape = (num_samples, -1, 4+1+num_classes)
37+
// (1,-1,9) = (1, 1800, 9)
38+
// while input shape = (1,27,20,30)
39+
vector<int> new_shape(3, 0); // (0, 0, 0)
40+
new_shape[0] = bottom[0]->shape(0);
41+
new_shape[2] = 4 + 1 + num_classes_;
42+
new_shape[1] = count / new_shape[0] / new_shape[2];
43+
top[0]->Reshape(new_shape);
44+
CHECK_EQ(count, top[0]->count());
45+
}
46+
47+
//void comput_grid_offsets(int grid_size_h, int grid_size_w, int img_dim_h, int )
48+
/*template <typename Dtype>
49+
inline Dtype exp(Dtype x) {
50+
// std::exp(-x) for -x less than -87 will cause underflow 32bit float range
51+
//if (x < -86) return Dtype(0.0);
52+
return std::exp(x);
53+
}*/
54+
55+
template <typename Dtype>
56+
inline Dtype sigmoid(Dtype x) {
57+
return 0.5 * std::tanh(0.5 * x) + 0.5;
58+
// if use std::exp should clamp exp(-87 or lower), it will underflow 32bit float
59+
//return 1.0 / (1.0 + exp(-x));
60+
}
61+
62+
template <typename Dtype>
63+
void Tensor2BoxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
64+
const vector<Blob<Dtype>*>& top) {
65+
// this layer is translating tensor2box python function from
66+
// cnn_models/pytorch/Nikon/4thBenchmark/ScenarioForAI-CVSubsystem/subModules/detection.py#L97-176
67+
const Dtype *bottom_data = bottom[0]->cpu_data();
68+
Dtype *top_data = top[0]->mutable_cpu_data();
69+
// setup variables
70+
const int num_anchors = anchors_x_.size();
71+
const int num_samples = bottom[0]->shape(0);
72+
const int H = bottom[0]->shape(2), W = bottom[0]->shape(3);
73+
const pair<int, int> grid_size(H, W);
74+
// 1st part:
75+
// data.reshape(num_samples, num_anchors, num_classes + 5, grid_size[0], grid_size[1]).transpose(0, 1, 3, 4, 2)
76+
// (1, 27, 20, 30) -> (1, 3, 20, 30, 9)
77+
// we can do the transpose operation during moving bottom_data to top_data
78+
const int _div = bottom[0]->shape(1) / num_anchors;
79+
int old_idx = 0;
80+
for (int s = 0; s < num_samples; ++s) {
81+
for (int a = 0; a < num_anchors; ++a) {
82+
for (int d = 0; d < _div; ++d) {
83+
for (int h = 0; h < H; ++h) {
84+
for (int w = 0; w < W; ++w) {
85+
// move old_idx==(s,a*num_anchors+d,h,w) to new_idx==(s,a,h,w,d)
86+
int new_idx = (((s*num_anchors+a)*H+h)*W+w)*_div+d;
87+
top_data[new_idx] = bottom_data[old_idx++];
88+
}
89+
}
90+
}
91+
}
92+
}
93+
// 2nd part: compute_grid_offsets
94+
const int stride_w = img_dim_w_ / W; // stride_w = img_dim[1] / grid_size[1]
95+
const int stride_h = img_dim_h_ / H; // stride_h = img_dim[0] / grid_size[0]
96+
vector<Dtype>anchor_h(num_anchors, 0.0); // to store scaled anchor_h and anchor_w
97+
vector<Dtype>anchor_w(num_anchors, 0.0); // to store scaled anchor_h and anchor_w
98+
for (int i = 0; i < num_anchors; ++i) {
99+
anchor_w[i] = Dtype(anchors_x_[i]) / stride_w;
100+
anchor_h[i] = Dtype(anchors_y_[i]) / stride_h;
101+
}
102+
// 3rd part: pred_boxes
103+
for (int s = 0; s < num_samples; ++s) {
104+
for (int a = 0; a < num_anchors; ++a) {
105+
for (int h = 0; h < H; ++h) {
106+
for (int w = 0; w < W; ++w) {
107+
// 0,1,4,5~ sigmoid
108+
// 0,1 +grid_xy
109+
// 2,3 exp() * anchor
110+
// 0~4 *stride
111+
top_data[0] = (sigmoid(top_data[0]) + w) * stride_w;
112+
top_data[1] = (sigmoid(top_data[1]) + h) * stride_h;
113+
top_data[2] = (std::exp(top_data[2]) * anchor_w[a]) * stride_w;
114+
top_data[3] = (std::exp(top_data[3]) * anchor_h[a]) * stride_h;
115+
// pred_conf = sigmoid(prediction[..., 4]) # Conf
116+
// pred_cls = sigmoid(prediction[..., 5:]) # Cls pred.
117+
for (int i = 4; i < _div; ++i) {
118+
top_data[i] = sigmoid(top_data[i]);
119+
}
120+
top_data += _div;
121+
}
122+
}
123+
}
124+
}
125+
;//end of forward_cpu
126+
}
127+
128+
INSTANTIATE_CLASS(Tensor2BoxLayer);
129+
REGISTER_LAYER_CLASS(Tensor2Box);
130+
131+
} // namespace caffe

src/caffe/proto/caffe.proto

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ message ParamSpec {
460460
// NOTE
461461
// Update the next available ID when you add a new LayerParameter field.
462462
//
463-
// LayerParameter next available layer-specific ID: 276 (last added: ScatterND)
463+
// LayerParameter next available layer-specific ID: 281 (last added: Tensor2Box=280)
464464
message LayerParameter {
465465
optional string name = 1; // the layer name
466466
optional string type = 2; // the layer type
@@ -682,6 +682,7 @@ message LayerParameter {
682682
optional NonMaxSuppressionParameter non_max_suppression_param = 271;
683683
optional ScatterNDParameter scatter_nd_param = 276;
684684
optional LayerNormParameter layer_norm_param = 277;
685+
optional Tensor2BoxParameter tensor2box_param = 280;
685686
}
686687

687688
message AccumParameter {
@@ -3676,3 +3677,11 @@ message SpatialBatchingPoolingParameter {
36763677
CUDNN = 2;
36773678
}
36783679
}
3680+
3681+
message Tensor2BoxParameter {
3682+
required int32 num_classes = 1;
3683+
required int32 img_dim_h = 2;
3684+
required int32 img_dim_w = 3;
3685+
repeated int32 anchors_x = 4;
3686+
repeated int32 anchors_y = 5;
3687+
}

0 commit comments

Comments
 (0)