Skip to content

Commit 75d1f91

Browse files
SowmyaDhanapalvera121
authored andcommitted
Caffe YoloV3 training patch (#7)
* Fix type issue for SSD inference detection file(.json) dump * Add support for Yolov2 training(COCO Dataset) * Add multi_label feature in data layer to support YoloV3 * Remove unused function * Add YoloV3 loss layer * Fix segmentation fault error while doing make runtest
1 parent e306be1 commit 75d1f91

File tree

13 files changed

+944
-88
lines changed

13 files changed

+944
-88
lines changed

data/coco/labelmap_yolo_coco.prototxt

Lines changed: 400 additions & 0 deletions
Large diffs are not rendered by default.

include/caffe/layers/base_data_layer.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,15 @@ class BaseDataLayer : public Layer<Dtype> {
4141
TransformationParameter transform_param_;
4242
shared_ptr<DataTransformer<Dtype> > data_transformer_;
4343
bool output_labels_;
44+
bool box_label_ = false;
45+
int top_size_;
4446
};
4547

4648
template <typename Dtype>
4749
class Batch {
4850
public:
4951
Blob<Dtype> data_, label_, dim_;
52+
vector<shared_ptr<Blob<Dtype> > > multi_label_;
5053
};
5154

5255
template <typename Dtype>
@@ -64,7 +67,6 @@ class BasePrefetchingDataLayer :
6467
const vector<Blob<Dtype>*>& top);
6568
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
6669
const vector<Blob<Dtype>*>& top);
67-
6870
protected:
6971
virtual void InternalThreadEntry();
7072
virtual void load_batch(Batch<Dtype>* batch) = 0;

include/caffe/layers/yolo_v2_loss_layer.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ namespace caffe {
1515
Dtype Overlap(Dtype x1, Dtype w1, Dtype x2, Dtype w2);
1616
template <typename Dtype>
1717
Dtype Calc_iou(const std::vector<Dtype>& box, const std::vector<Dtype>& truth);
18-
template <typename Dtype>
19-
Dtype Calc_rmse(const std::vector<Dtype>& box, const std::vector<Dtype>& truth);
2018

2119
template<typename Dtype>
2220
class YoloV2LossLayer: public LossLayer<Dtype> {
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#ifndef YOLOV3LOSSLAYER_H
2+
#define YOLOV3LOSSLAYER_H
3+
4+
#include <vector>
5+
#include <google/protobuf/repeated_field.h>
6+
7+
#include "caffe/blob.hpp"
8+
#include "caffe/layer.hpp"
9+
#include "caffe/proto/caffe.pb.h"
10+
11+
#include "caffe/layers/loss_layer.hpp"
12+
13+
namespace caffe {
14+
template <typename Dtype>
15+
Dtype Overlap(Dtype x1, Dtype w1, Dtype x2, Dtype w2);
16+
template <typename Dtype>
17+
Dtype Calc_iou(const std::vector<Dtype>& box, const std::vector<Dtype>& truth);
18+
19+
template<typename Dtype>
20+
class YoloV3LossLayer: public LossLayer<Dtype> {
21+
public:
22+
explicit YoloV3LossLayer(const LayerParameter& param)
23+
: LossLayer<Dtype>(param), diff_() {}
24+
25+
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
26+
const vector<Blob<Dtype>*>& top);
27+
28+
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
29+
const vector<Blob<Dtype>*>& top);
30+
31+
virtual inline const char* type() const { return "YoloV3Loss"; }
32+
33+
protected:
34+
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
35+
const vector<Blob<Dtype>*>& top);
36+
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
37+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
38+
39+
vector<int> biases_;
40+
vector<int> masks_;
41+
int side_;
42+
int num_classes_;
43+
int num_boxes_;
44+
int total_num_boxes_;
45+
float ignore_thresh_;
46+
float truth_thresh_;
47+
int seen;
48+
int net_w_;
49+
int net_h_;
50+
51+
Blob<Dtype> diff_;
52+
};
53+
}
54+
55+
#endif // YOLOV3LOSSLAYER_H

include/caffe/util/io.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ bool ReadXMLToAnnotatedDatum(const string& labelname, const int img_height,
230230

231231
bool ReadJSONToAnnotatedDatum(const string& labelname, const int img_height,
232232
const int img_width, const std::map<string, int>& name_to_label,
233-
AnnotatedDatum* anno_datum);
233+
AnnotatedDatum* anno_datum, const bool caffe_yolo = false);
234234

235235
bool ReadTxtToAnnotatedDatum(const string& labelname, const int height,
236236
const int width, AnnotatedDatum* anno_datum);

src/caffe/layers/annotated_data_layer.cpp

Lines changed: 55 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ void AnnotatedDataLayer<Dtype>::DataLayerSetUp(
6666
for (int i = 0; i < this->prefetch_.size(); ++i) {
6767
this->prefetch_[i]->data_.Reshape(top_shape);
6868
}
69-
//LOG(INFO) << "output data size: " << top[0]->num() << ","
7069
LOG_IF(INFO, Caffe::root_solver())
7170
<< "output data size: " << top[0]->num() << ","
7271
<< top[0]->channels() << "," << top[0]->height() << ","
@@ -75,15 +74,13 @@ void AnnotatedDataLayer<Dtype>::DataLayerSetUp(
7574
if (this->output_labels_) {
7675
has_anno_type_ = anno_datum.has_type() || anno_data_param.has_anno_type();
7776
if(transform_param.has_caffe_yolo()) {
77+
this->box_label_ = true;
7878
vector<int> label_shape(1, batch_size);
7979
if (param.side_size() > 0) {
8080
for (int i = 0; i < param.side_size(); ++i) {
8181
sides_.push_back(param.side(i));
8282
}
8383
}
84-
if (sides_.size() == 0) {
85-
sides_.push_back(7);
86-
}
8784
CHECK_EQ(sides_.size(), top.size() - 1) << "side num not equal to top size";
8885
if (has_anno_type_) {
8986
anno_type_ = anno_datum.type();
@@ -93,23 +90,28 @@ void AnnotatedDataLayer<Dtype>::DataLayerSetUp(
9390
LOG(WARNING) << "type stored in AnnotatedDatum is shadowed.";
9491
anno_type_ = anno_data_param.anno_type();
9592
}
96-
if (anno_type_ == AnnotatedDatum_AnnotationType_BBOX) {
93+
for (int i = 0; i < this->prefetch_.size(); ++i) {
94+
this->prefetch_[i]->multi_label_.clear();
95+
}
96+
if (anno_type_ == AnnotatedDatum_AnnotationType_BBOX) {
9797
// Yolo label format
9898
for (int i = 0; i < sides_.size(); ++i) {
9999
vector<int> label_shape(1, batch_size);
100100
int label_size = sides_[i] * sides_[i] * (1 + 1 + 1 + 4);
101101
label_shape.push_back(label_size);
102102
top[i+1]->Reshape(label_shape);
103+
for (int j = 0; j < this->prefetch_.size(); ++j) {
104+
shared_ptr<Blob<Dtype> > tmp_blob;
105+
tmp_blob.reset(new Blob<Dtype>(label_shape));
106+
this->prefetch_[j]->multi_label_.push_back(tmp_blob);
107+
}
103108
}
104109
} else {
105110
LOG(FATAL) << "Unknown annotation type.";
106111
}
107112
} else {
108113
label_shape[0] = batch_size;
109114
}
110-
for (int i = 0; i < this->prefetch_.size(); ++i) {
111-
this->prefetch_[i]->label_.Reshape(label_shape);
112-
}
113115
}
114116
else {
115117
vector<int> label_shape(4, 1);
@@ -214,7 +216,10 @@ void AnnotatedDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
214216
// Store transformed annotation.
215217
map<int, vector<AnnotationGroup> > all_anno;
216218
int num_bboxes = 0;
217-
219+
vector<Dtype*> top_label;
220+
for (int i = 0; i < sides_.size(); ++i) {
221+
top_label.push_back(batch->multi_label_[i]->mutable_cpu_data());
222+
}
218223
for (int item_id = 0; item_id < batch_size; ++item_id) {
219224
timer.Start();
220225
// get a anno_datum
@@ -333,47 +338,47 @@ void AnnotatedDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
333338
for (int i = 0; i < sides_.size(); ++i) {
334339
side = sides_[i];
335340
count = sides_[i] * sides_[i] * (1 + 1 + 1 + 4);
336-
}
337-
label_shape[0] = batch_size;
338-
label_shape[1] = count;
339-
batch->label_.Reshape(label_shape);
340-
Dtype* top_label = batch->label_.mutable_cpu_data();
341-
const vector<AnnotationGroup>& anno_vec = all_anno[item_id];
342-
label_offset = count * item_id;
343-
top_label = top_label + label_offset;
344-
int locations = pow(side, 2);
345-
CHECK_EQ(count, locations * 7) << "side and count not match";
346-
// difficult
347-
caffe_set(locations, Dtype(0), top_label);
348-
// isobj
349-
caffe_set(locations, Dtype(0), top_label + locations);
350-
// class label
351-
caffe_set(locations, Dtype(-1), top_label + locations * 2);
352-
// bounding box
353-
caffe_set(locations*4, Dtype(0), top_label + locations * 3);
354-
for (int g = 0; g < anno_vec.size(); ++g) {
355-
const AnnotationGroup& anno_group = anno_vec[g];
356-
for (int a = 0; a < anno_group.annotation_size(); ++a) {
357-
const Annotation& anno = anno_group.annotation(a);
358-
const NormalizedBBox& bbox = anno.bbox();
359-
float class_label = anno_group.group_label();
360-
float x = bbox.x_center();
361-
float y = bbox.y_center();
362-
int x_index = floor(x * side);
363-
int y_index = floor(y * side);
364-
x_index = std::min(x_index, side - 1);
365-
y_index = std::min(y_index, side - 1);
366-
int dif_index = side * y_index + x_index;
367-
int obj_index = locations + dif_index;
368-
int class_index = locations * 2 + dif_index;
369-
int cor_index = locations * 3 + dif_index * 4;
370-
top_label[dif_index] = bbox.difficult();
371-
top_label[obj_index] = 1;
372-
top_label[class_index] = class_label;
373-
top_label[cor_index + 0] = bbox.x_center();
374-
top_label[cor_index + 1] = bbox.y_center();
375-
top_label[cor_index + 2] = bbox.width();
376-
top_label[cor_index + 3] = bbox.height();
341+
label_shape[0] = batch_size;
342+
label_shape[1] = count;
343+
batch->multi_label_[i]->Reshape(label_shape);
344+
top_label[i] = batch->multi_label_[i]->mutable_cpu_data();
345+
const vector<AnnotationGroup>& anno_vec = all_anno[item_id];
346+
label_offset = count * item_id;
347+
top_label[i] = top_label[i] + label_offset;
348+
int locations = pow(side, 2);
349+
CHECK_EQ(count, locations * 7) << "side and count not match";
350+
// difficult
351+
caffe_set(locations, Dtype(0), top_label[i]);
352+
// isobj
353+
caffe_set(locations, Dtype(0), top_label[i] + locations);
354+
// class label
355+
caffe_set(locations, Dtype(-1), top_label[i] + locations * 2);
356+
// bounding box
357+
caffe_set(locations*4, Dtype(0), top_label[i] + locations * 3);
358+
for (int g = 0; g < anno_vec.size(); ++g) {
359+
const AnnotationGroup& anno_group = anno_vec[g];
360+
for (int a = 0; a < anno_group.annotation_size(); ++a) {
361+
const Annotation& anno = anno_group.annotation(a);
362+
const NormalizedBBox& bbox = anno.bbox();
363+
float class_label = anno_group.group_label();
364+
float x = bbox.x_center();
365+
float y = bbox.y_center();
366+
int x_index = floor(x * side);
367+
int y_index = floor(y * side);
368+
x_index = std::min(x_index, side - 1);
369+
y_index = std::min(y_index, side - 1);
370+
int dif_index = side * y_index + x_index;
371+
int obj_index = locations + dif_index;
372+
int class_index = locations * 2 + dif_index;
373+
int cor_index = locations * 3 + dif_index * 4;
374+
top_label[i][dif_index] = bbox.difficult();
375+
top_label[i][obj_index] = 1;
376+
top_label[i][class_index] = class_label;
377+
top_label[i][cor_index + 0] = bbox.x_center();
378+
top_label[i][cor_index + 1] = bbox.y_center();
379+
top_label[i][cor_index + 2] = bbox.width();
380+
top_label[i][cor_index + 3] = bbox.height();
381+
}
377382
}
378383
}
379384
}
@@ -401,7 +406,6 @@ void AnnotatedDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
401406
//reader_.free().push(const_cast<AnnotatedDatum*>(&anno_datum));
402407
Next();
403408
}
404-
405409
// Store "rich" annotation if needed.
406410
if (this->output_labels_ && has_anno_type_) {
407411
if(anno_type_ == AnnotatedDatum_AnnotationType_BBOX) {

src/caffe/layers/base_data_layer.cpp

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,30 @@ void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
5656
for (int i = 0; i < prefetch_.size(); ++i) {
5757
prefetch_[i]->data_.mutable_cpu_data();
5858
if (this->output_labels_) {
59-
prefetch_[i]->label_.mutable_cpu_data();
59+
if (this->box_label_) {
60+
this->top_size_ = top.size();
61+
for (int j = 0; j < top.size() - 1; ++j) {
62+
prefetch_[i]->multi_label_[j]->mutable_cpu_data();
63+
}
64+
}
65+
else {
66+
prefetch_[i]->label_.mutable_cpu_data();
67+
}
6068
}
6169
}
6270
#ifndef CPU_ONLY
6371
if (Caffe::mode() == Caffe::GPU) {
6472
for (int i = 0; i < prefetch_.size(); ++i) {
6573
prefetch_[i]->data_.mutable_gpu_data();
6674
if (this->output_labels_) {
67-
prefetch_[i]->label_.mutable_gpu_data();
75+
if (this->box_label_) {
76+
for (int j = 0; j < top.size() - 1; ++j) {
77+
prefetch_[i]->multi_label_[j]->mutable_gpu_data();
78+
}
79+
}
80+
else {
81+
prefetch_[i]->label_.mutable_gpu_data();
82+
}
6883
}
6984
}
7085
}
@@ -92,7 +107,14 @@ void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
92107
if (Caffe::mode() == Caffe::GPU) {
93108
batch->data_.data().get()->async_gpu_push(stream);
94109
if (this->output_labels_) {
95-
batch->label_.data().get()->async_gpu_push(stream);
110+
if(this->box_label_) {
111+
for (int j = 0; j < this->top_size_ - 1; ++j) {
112+
batch->multi_label_[j]->data().get()->async_gpu_push(stream);
113+
}
114+
}
115+
else {
116+
batch->label_.data().get()->async_gpu_push(stream);
117+
}
96118
}
97119
CUDA_CHECK(cudaStreamSynchronize(stream));
98120
}
@@ -120,9 +142,17 @@ void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
120142
top[0]->ReshapeLike(prefetch_current_->data_);
121143
top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());
122144
if (this->output_labels_) {
123-
// Reshape to loaded labels.
124-
top[1]->ReshapeLike(prefetch_current_->label_);
125-
top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
145+
if(this->box_label_) {
146+
for (int j = 0; j < top.size() - 1; ++j) {
147+
top[j+1]->ReshapeLike(*(prefetch_current_->multi_label_[j]));
148+
top[j+1]->set_cpu_data(prefetch_current_->multi_label_[j]->mutable_cpu_data());
149+
}
150+
}
151+
else{
152+
// Reshape to loaded labels.
153+
top[1]->ReshapeLike(prefetch_current_->label_);
154+
top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
155+
}
126156
}
127157
}
128158

src/caffe/layers/base_data_layer.cu

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,17 @@ void BasePrefetchingDataLayer<Dtype>::Forward_gpu(
1515
top[0]->ReshapeLike(prefetch_current_->data_);
1616
top[0]->set_gpu_data(prefetch_current_->data_.mutable_gpu_data());
1717
if (this->output_labels_) {
18-
// Reshape to loaded labels.
19-
top[1]->ReshapeLike(prefetch_current_->label_);
20-
top[1]->set_gpu_data(prefetch_current_->label_.mutable_gpu_data());
18+
if(this->box_label_) {
19+
for (int j = 0; j < top.size() - 1; ++j) {
20+
top[j+1]->ReshapeLike(*(prefetch_current_->multi_label_[j]));
21+
top[j+1]->set_gpu_data(prefetch_current_->multi_label_[j]->mutable_gpu_data());
22+
}
23+
}
24+
else {
25+
// Reshape to loaded labels.
26+
top[1]->ReshapeLike(prefetch_current_->label_);
27+
top[1]->set_gpu_data(prefetch_current_->label_.mutable_gpu_data());
28+
}
2129
}
2230
}
2331

src/caffe/layers/yolo_v2_loss_layer.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,6 @@ namespace caffe {
6868
return x;
6969
}
7070

71-
template <typename Dtype>
72-
Dtype Calc_rmse(const Box<Dtype>& truth, const Box<Dtype>& box, Dtype &coord_loss, Dtype &area_loss, float scale) {
73-
float coord_ = scale * (abs(box.x-truth.x) + abs(box.y-truth.y));
74-
float area_ = scale * (abs(box.w-truth.w) + abs(box.h-truth.h));
75-
coord_loss += coord_;
76-
area_loss += area_;
77-
return (coord_ + area_);
78-
}
79-
8071
template <typename Dtype>
8172
float delta_region_box(Box<Dtype>& truth_box, Dtype* input,
8273
std::vector<float>& biases, int n, int side, Dtype* diff,

0 commit comments

Comments
 (0)