Skip to content

Commit fd7f96d

Browse files
committed
separate the ssd decoder for host_fixed usage
1 parent 3936762 commit fd7f96d

File tree

2 files changed

+306
-0
lines changed

2 files changed

+306
-0
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#ifndef CAFFE_SSD_DECODER_LAYER_HPP_
2+
#define CAFFE_SSD_DECODER_LAYER_HPP_
3+
4+
#include <boost/property_tree/json_parser.hpp>
5+
#include <boost/property_tree/ptree.hpp>
6+
#include <boost/regex.hpp>
7+
8+
#include <map>
9+
#include <string>
10+
#include <utility>
11+
#include <vector>
12+
13+
#include "caffe/blob.hpp"
14+
#include "caffe/data_transformer.hpp"
15+
#include "caffe/layer.hpp"
16+
#include "caffe/proto/caffe.pb.h"
17+
#include "caffe/util/bbox_util.hpp"
18+
19+
using namespace boost::property_tree; // NOLINT(build/namespaces)
20+
21+
namespace caffe {
22+
23+
/**
24+
* @brief Separate the bbox decoding part from the detection_output layer
25+
* and simplify the implementations for host_fixed usage.
26+
*
27+
* NOTE: does not implement Backwards operation.
28+
*/
29+
template <typename Dtype>
30+
class SSDDecoderLayer : public Layer<Dtype> {
31+
public:
32+
explicit SSDDecoderLayer(const LayerParameter& param)
33+
: Layer<Dtype>(param) {}
34+
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
35+
const vector<Blob<Dtype>*>& top);
36+
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
37+
const vector<Blob<Dtype>*>& top);
38+
39+
virtual inline const char* type() const { return "SSDDecoder"; }
40+
virtual inline int ExactNumBottomBlobs() const { return 2; } //bottom0 is loc, bottom1 is priorbox
41+
virtual inline int ExactNumTopBlobs() const { return 1; }
42+
43+
protected:
44+
45+
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
46+
const vector<Blob<Dtype>*>& top);
47+
/// @brief Not implemented
48+
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
49+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
50+
NOT_IMPLEMENTED;
51+
}
52+
53+
float objectness_score_;
54+
int num_classes_;
55+
bool share_location_;
56+
int num_loc_classes_;
57+
int background_label_id_;
58+
CodeType code_type_;
59+
bool variance_encoded_in_target_;
60+
int keep_top_k_;
61+
float confidence_threshold_;
62+
63+
int num_;
64+
int num_priors_;
65+
66+
float nms_threshold_;
67+
int top_k_;
68+
float eta_;
69+
70+
bool need_save_;
71+
string output_directory_;
72+
string output_name_prefix_;
73+
string output_format_;
74+
map<int, string> label_to_name_;
75+
map<int, string> label_to_display_name_;
76+
vector<string> names_;
77+
vector<pair<int, int> > sizes_;
78+
int num_test_image_;
79+
int name_count_;
80+
bool has_resize_;
81+
ResizeParameter resize_param_;
82+
83+
ptree detections_;
84+
85+
bool visualize_;
86+
float visualize_threshold_;
87+
shared_ptr<DataTransformer<Dtype> > data_transformer_;
88+
string save_file_;
89+
Blob<Dtype> bbox_preds_;
90+
Blob<Dtype> bbox_permute_;
91+
Blob<Dtype> conf_permute_;
92+
};
93+
94+
} // namespace caffe
95+
96+
#endif // CAFFE_SSD_DECODER_LAYER_HPP_
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
#include <algorithm>
2+
#include <fstream> // NOLINT(readability/streams)
3+
#include <map>
4+
#include <string>
5+
#include <utility>
6+
#include <vector>
7+
8+
#include "boost/filesystem.hpp"
9+
#include "boost/foreach.hpp"
10+
11+
#include "caffe/layers/ssd_decoder_layer.hpp"
12+
13+
namespace caffe {
14+
15+
template <typename Dtype>
16+
void SSDDecoderLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
17+
const vector<Blob<Dtype>*>& top) {
18+
const DetectionOutputParameter& detection_output_param =
19+
this->layer_param_.detection_output_param();
20+
CHECK(detection_output_param.has_num_classes()) << "Must specify num_classes";
21+
objectness_score_ = detection_output_param.objectness_score();
22+
num_classes_ = detection_output_param.num_classes();
23+
share_location_ = detection_output_param.share_location();
24+
num_loc_classes_ = share_location_ ? 1 : num_classes_;
25+
background_label_id_ = detection_output_param.background_label_id();
26+
code_type_ = detection_output_param.code_type();
27+
variance_encoded_in_target_ =
28+
detection_output_param.variance_encoded_in_target();
29+
keep_top_k_ = detection_output_param.keep_top_k();
30+
confidence_threshold_ = detection_output_param.has_confidence_threshold() ?
31+
detection_output_param.confidence_threshold() : -FLT_MAX;
32+
// Parameters used in nms.
33+
nms_threshold_ = detection_output_param.nms_param().nms_threshold();
34+
CHECK_GE(nms_threshold_, 0.) << "nms_threshold must be non negative.";
35+
eta_ = detection_output_param.nms_param().eta();
36+
CHECK_GT(eta_, 0.);
37+
CHECK_LE(eta_, 1.);
38+
top_k_ = -1;
39+
if (detection_output_param.nms_param().has_top_k()) {
40+
top_k_ = detection_output_param.nms_param().top_k();
41+
}
42+
const SaveOutputParameter& save_output_param =
43+
detection_output_param.save_output_param();
44+
output_directory_ = save_output_param.output_directory();
45+
if (!output_directory_.empty()) {
46+
if (boost::filesystem::is_directory(output_directory_)) {
47+
// boost::filesystem::remove_all(output_directory_);
48+
}
49+
if (!boost::filesystem::create_directories(output_directory_)) {
50+
LOG(WARNING) << "Failed to create directory: " << output_directory_;
51+
}
52+
}
53+
output_name_prefix_ = save_output_param.output_name_prefix();
54+
need_save_ = output_directory_ == "" ? false : true;
55+
output_format_ = save_output_param.output_format();
56+
if (save_output_param.has_label_map_file()) {
57+
string label_map_file = save_output_param.label_map_file();
58+
if (label_map_file.empty()) {
59+
// Ignore saving if there is no label_map_file provided.
60+
LOG(WARNING) << "Provide label_map_file if output results to files.";
61+
need_save_ = false;
62+
} else {
63+
LabelMap label_map;
64+
CHECK(ReadProtoFromTextFile(label_map_file, &label_map))
65+
<< "Failed to read label map file: " << label_map_file;
66+
CHECK(MapLabelToName(label_map, true, &label_to_name_))
67+
<< "Failed to convert label to name.";
68+
CHECK(MapLabelToDisplayName(label_map, true, &label_to_display_name_))
69+
<< "Failed to convert label to display name.";
70+
}
71+
} else {
72+
need_save_ = false;
73+
}
74+
if (save_output_param.has_name_size_file()) {
75+
string name_size_file = save_output_param.name_size_file();
76+
if (name_size_file.empty()) {
77+
// Ignore saving if there is no name_size_file provided.
78+
LOG(WARNING) << "Provide name_size_file if output results to files.";
79+
need_save_ = false;
80+
} else {
81+
std::ifstream infile(name_size_file.c_str());
82+
CHECK(infile.good())
83+
<< "Failed to open name size file: " << name_size_file;
84+
// The file is in the following format:
85+
// name height width
86+
// ...
87+
string name;
88+
int height, width;
89+
while (infile >> name >> height >> width) {
90+
names_.push_back(name);
91+
sizes_.push_back(std::make_pair(height, width));
92+
}
93+
infile.close();
94+
if (save_output_param.has_num_test_image()) {
95+
num_test_image_ = save_output_param.num_test_image();
96+
} else {
97+
num_test_image_ = names_.size();
98+
}
99+
CHECK_LE(num_test_image_, names_.size());
100+
}
101+
} else {
102+
need_save_ = false;
103+
}
104+
has_resize_ = save_output_param.has_resize_param();
105+
if (has_resize_) {
106+
resize_param_ = save_output_param.resize_param();
107+
}
108+
name_count_ = 0;
109+
visualize_ = detection_output_param.visualize();
110+
if (visualize_) {
111+
visualize_threshold_ = 0.6;
112+
if (detection_output_param.has_visualize_threshold()) {
113+
visualize_threshold_ = detection_output_param.visualize_threshold();
114+
}
115+
data_transformer_.reset(
116+
new DataTransformer<Dtype>(this->layer_param_.transform_param(),
117+
this->phase_));
118+
data_transformer_->InitRand();
119+
save_file_ = detection_output_param.save_file();
120+
}
121+
bbox_preds_.ReshapeLike(*(bottom[0]));
122+
if (!share_location_) {
123+
bbox_permute_.ReshapeLike(*(bottom[0]));
124+
}
125+
conf_permute_.ReshapeLike(*(bottom[1]));
126+
}
127+
128+
template <typename Dtype>
129+
void SSDDecoderLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
130+
const vector<Blob<Dtype>*>& top) {
131+
132+
if (bbox_preds_.num() != bottom[0]->num() ||
133+
bbox_preds_.count(1) != bottom[0]->count(1)) {
134+
bbox_preds_.ReshapeLike(*(bottom[0]));
135+
}
136+
if (!share_location_ && (bbox_permute_.num() != bottom[0]->num() ||
137+
bbox_permute_.count(1) != bottom[0]->count(1))) {
138+
bbox_permute_.ReshapeLike(*(bottom[0]));
139+
}
140+
141+
num_priors_ = bottom[1]->height() / 4;
142+
CHECK_EQ(num_priors_ * num_loc_classes_ * 4, bottom[0]->channels())
143+
<< "Number of priors must match number of location predictions.";
144+
// num() and channels() are 1.
145+
vector<int> top_shape(2, 1);
146+
top_shape.push_back(num_priors_);
147+
// Each row is a 4 dimension vector, which stores
148+
// [xmin, ymin, xmax, ymax]
149+
top_shape.push_back(4);
150+
top[0]->Reshape(top_shape);
151+
}
152+
153+
template <typename Dtype>
154+
void SSDDecoderLayer<Dtype>::Forward_cpu(
155+
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
156+
const Dtype* loc_data = bottom[0]->cpu_data();
157+
const Dtype* prior_data = bottom[1]->cpu_data();
158+
const int num = bottom[0]->num();
159+
160+
// Retrieve all location predictions.
161+
vector<LabelBBox> all_loc_preds;
162+
GetLocPredictions(loc_data, num, num_priors_, num_loc_classes_,
163+
share_location_, &all_loc_preds);
164+
165+
// Retrieve all prior bboxes. It is same within a batch since we assume all
166+
// images in a batch are of same dimension.
167+
vector<NormalizedBBox> prior_bboxes;
168+
vector<vector<float> > prior_variances;
169+
GetPriorBBoxes(prior_data, num_priors_, &prior_bboxes, &prior_variances);
170+
171+
// Decode all loc predictions to bboxes.
172+
vector<LabelBBox> all_decode_bboxes;
173+
const bool clip_bbox = false;
174+
175+
DecodeBBoxesAll(all_loc_preds, prior_bboxes, prior_variances, num,
176+
share_location_, num_loc_classes_, background_label_id_,
177+
code_type_, variance_encoded_in_target_, clip_bbox,
178+
&all_decode_bboxes);
179+
180+
Dtype* top_data = top[0]->mutable_cpu_data();
181+
182+
int count = 0;
183+
//boost::filesystem::path output_directory(output_directory_);
184+
for (int i = 0; i < num; ++i) {
185+
const LabelBBox& decode_bboxes = all_decode_bboxes[i];
186+
187+
int loc_label = -1; //share_location_ ? -1 : label;
188+
if (decode_bboxes.find(loc_label) == decode_bboxes.end()) {
189+
// Something bad happened if there are no predictions for current label.
190+
LOG(FATAL) << "Could not find location predictions for " << loc_label;
191+
continue;
192+
}
193+
const vector<NormalizedBBox>& bboxes =
194+
decode_bboxes.find(loc_label)->second;
195+
196+
for (int j = 0; j < num_priors_; ++j) {
197+
const NormalizedBBox& bbox = bboxes[j];
198+
top_data[count * 4] = bbox.xmin();
199+
top_data[count * 4 + 1] = bbox.ymin();
200+
top_data[count * 4 + 2] = bbox.xmax();
201+
top_data[count * 4 + 3] = bbox.ymax();
202+
++count;
203+
}
204+
}
205+
}
206+
207+
INSTANTIATE_CLASS(SSDDecoderLayer);
208+
REGISTER_LAYER_CLASS(SSDDecoder);
209+
210+
} // namespace caffe

0 commit comments

Comments
 (0)