Skip to content

Commit c27db90

Browse files
SowmyaDhanapalvera121
authored andcommitted
Caffe Yolo v2 training patch (#3)
* Add Yolo-v2 training support * Fix XML parser and update function declaration Generalize XML parsing for VOC2007 and VOC2012 datasets to avoid updating difficult label in bounding box information without reading from its corresponding XML file. Update function declaration based on suggestion. * Comment STUB_GPU(YoloV2LossLayer) Since there is no GPU implementation for YoloV2LossLayer, commented the STUB_GPU call to avoid build issues
1 parent b274bc6 commit c27db90

17 files changed

+1427
-95
lines changed

data/VOC0712/create_data.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cd $root_dir
66
redo=1
77
data_root_dir="$HOME/data/VOCdevkit"
88
dataset_name="VOC0712"
9-
mapfile="$root_dir/data/$dataset_name/labelmap_voc.prototxt"
9+
mapfile="$root_dir/data/$dataset_name/labelmap_yolo_voc.prototxt"
1010
anno_type="detection"
1111
db="lmdb"
1212
min_dim=0
@@ -21,5 +21,5 @@ then
2121
fi
2222
for subset in test trainval
2323
do
24-
python $root_dir/scripts/create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$subset.txt $data_root_dir/$dataset_name/$db/$dataset_name"_"$subset"_"$db examples/$dataset_name
24+
python $root_dir/scripts/create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim --resize-width=$width --resize-height=$height --check-label --caffe-yolo $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$subset.txt $data_root_dir/$dataset_name/$db/$dataset_name"_"$subset"_"$db examples/$dataset_name
2525
done

include/caffe/data_transformer.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,9 @@ class DataTransformer {
258258
*/
259259
void Transform(const Datum& datum, Blob<Dtype>* transformed_blob,
260260
NormalizedBBox* crop_bbox, bool* do_mirror);
261-
261+
void Transform_Yolo(const AnnotatedDatum& anno_datum,
262+
Blob<Dtype>* transformed_blob, NormalizedBBox* crop_bbox,
263+
RepeatedPtrField<AnnotationGroup>* transformed_anno_group_all);
262264
// Tranformation parameters
263265
TransformationParameter param_;
264266

include/caffe/layers/annotated_data_layer.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class AnnotatedDataLayer : public BasePrefetchingDataLayer<Dtype> {
4040
AnnotatedDatum_AnnotationType anno_type_;
4141
vector<BatchSampler> batch_samplers_;
4242
string label_map_file_;
43+
vector<int> sides_;
4344
};
4445

4546
} // namespace caffe
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#ifndef CAFFE_REORG_LAYER_HPP_
2+
#define CAFFE_REORG_LAYER_HPP_
3+
4+
#include <vector>
5+
6+
#include "caffe/blob.hpp"
7+
#include "caffe/layer.hpp"
8+
#include "caffe/proto/caffe.pb.h"
9+
10+
namespace caffe {
11+
12+
/*
13+
* @brief Reshapes the input Blob into an arbitrary-sized output Blob.
14+
*
15+
* Note: similarly to FlattenLayer, this layer does not change the input values
16+
* (see FlattenLayer, Blob::ShareData and Blob::ShareDiff).
17+
*/
18+
template<typename Dtype>
19+
class ReorgLayer : public Layer<Dtype> {
20+
public:
21+
explicit ReorgLayer(const LayerParameter &param)
22+
: Layer<Dtype>(param) {}
23+
24+
virtual void LayerSetUp(const vector<Blob<Dtype> *> &bottom,
25+
const vector<Blob<Dtype> *> &top);
26+
27+
virtual void Reshape(const vector<Blob<Dtype> *> &bottom,
28+
const vector<Blob<Dtype> *> &top);
29+
30+
virtual inline const char *type() const { return "Reorg"; }
31+
32+
virtual inline int ExactNumBottomBlobs() const { return 1; }
33+
34+
virtual inline int ExactNumTopBlobs() const { return 1; }
35+
36+
protected:
37+
38+
39+
virtual void Forward_cpu(const vector<Blob<Dtype> *> &bottom,
40+
const vector<Blob<Dtype> *> &top);
41+
42+
virtual void Backward_cpu(const vector<Blob<Dtype> *> &top,
43+
const vector<bool> &propagate_down, const vector<Blob<Dtype> *> &bottom);
44+
45+
virtual void Forward_gpu(const vector<Blob<Dtype> *> &bottom,
46+
const vector<Blob<Dtype> *> &top);
47+
48+
virtual void Backward_gpu(const vector<Blob<Dtype> *> &top,
49+
const vector<bool> &propagate_down, const vector<Blob<Dtype> *> &bottom);
50+
51+
int stride_;
52+
bool reverse_;
53+
int batch_num_;
54+
int channels_;
55+
int reorged_channels_;
56+
int height_, width_;
57+
int reorged_height_, reorged_width_;
58+
Blob<Dtype> diff_;
59+
};
60+
template<typename Dtype>
61+
void reorg_cpu(Dtype *x, int w, int h, int c, int batch, int stride, int forward, Dtype *out)
62+
{
63+
int b,i,j,k;
64+
int out_c = c/(stride*stride);
65+
66+
for(b = 0; b < batch; ++b){
67+
for(k = 0; k < c; ++k){
68+
for(j = 0; j < h; ++j){
69+
for(i = 0; i < w; ++i){
70+
int in_index = i + w*(j + h*(k + c*b));
71+
int c2 = k % out_c;
72+
int offset = k / out_c;
73+
int w2 = i*stride + offset % stride;
74+
int h2 = j*stride + offset / stride;
75+
int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
76+
if(forward) out[out_index] = x[in_index];
77+
else out[in_index] = x[out_index];
78+
}
79+
}
80+
}
81+
}
82+
}
83+
84+
template<typename Dtype>
85+
void reorg_cpu(const Dtype *bottom_data, const int b_w, const int b_h,
86+
const int b_c, const int b_n, const int stride,
87+
const bool forward, Dtype *top_data) {
88+
int t_c = b_c / (stride * stride);
89+
int t_w = b_w * stride;
90+
int t_h = b_h * stride;
91+
for (int n = 0; n < b_n; n++) {
92+
for (int c = 0; c < b_c; c++) {
93+
for (int h = 0; h < b_h; h++) {
94+
for (int w = 0; w < b_w; w++) {
95+
int bottom_index = w + b_w * (h + b_h * (c + b_c * n));
96+
int c2 = c % t_c;
97+
int offset = c / t_c;
98+
int w2 = w * stride + offset % stride;
99+
int h2 = h * stride + offset / stride;
100+
int top_index = w2 + t_w * (h2 + t_h * (c2 + t_c * n));
101+
if (forward) top_data[top_index] = bottom_data[bottom_index];
102+
else
103+
top_data[bottom_index] = bottom_data[top_index];
104+
}
105+
}
106+
}
107+
}
108+
}
109+
110+
111+
} // namespace caffe
112+
113+
#endif // CAFFE_REORG_LAYER_HPP_
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#ifndef YOLOV2LOSSLAYER_H
2+
#define YOLOV2LOSSLAYER_H
3+
4+
#include <vector>
5+
#include <google/protobuf/repeated_field.h>
6+
7+
#include "caffe/blob.hpp"
8+
#include "caffe/layer.hpp"
9+
#include "caffe/proto/caffe.pb.h"
10+
11+
#include "caffe/layers/loss_layer.hpp"
12+
13+
namespace caffe {
14+
template <typename Dtype>
15+
Dtype Overlap(Dtype x1, Dtype w1, Dtype x2, Dtype w2);
16+
template <typename Dtype>
17+
Dtype Calc_iou(const std::vector<Dtype>& box, const std::vector<Dtype>& truth);
18+
template <typename Dtype>
19+
Dtype Calc_rmse(const std::vector<Dtype>& box, const std::vector<Dtype>& truth);
20+
21+
template<typename Dtype>
22+
class YoloV2LossLayer: public LossLayer<Dtype> {
23+
public:
24+
explicit YoloV2LossLayer(const LayerParameter& param)
25+
: LossLayer<Dtype>(param), diff_() {}
26+
27+
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
28+
const vector<Blob<Dtype>*>& top);
29+
30+
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
31+
const vector<Blob<Dtype>*>& top);
32+
33+
virtual inline const char* type() const { return "YoloV2Loss"; }
34+
35+
protected:
36+
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
37+
const vector<Blob<Dtype>*>& top);
38+
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
39+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
40+
41+
vector<float> biases_;
42+
int seen;
43+
int side_;
44+
int num_classes_;
45+
int num_boxes_;
46+
float box_scale_;
47+
float class_scale_;
48+
float object_scale_;
49+
float noobject_scale_;
50+
bool rescore_;
51+
bool constraint_;
52+
float thresh_;
53+
54+
Blob<Dtype> diff_;
55+
};
56+
}
57+
58+
#endif // YOLOV2LOSSLAYER_H

include/caffe/util/io.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -209,22 +209,24 @@ bool ReadRichImageToAnnotatedDatum(const string& filename,
209209
const string& labelname, const int height, const int width,
210210
const int min_dim, const int max_dim, const bool is_color,
211211
const std::string& encoding, const AnnotatedDatum_AnnotationType type,
212-
const string& labeltype, const std::map<string, int>& name_to_label,
213-
AnnotatedDatum* anno_datum);
212+
const string& labeltype, const std::map<string,
213+
int>& name_to_label, AnnotatedDatum* anno_datum,
214+
const bool caffe_yolo = false);
214215

215216
inline bool ReadRichImageToAnnotatedDatum(const string& filename,
216217
const string& labelname, const int height, const int width,
217218
const bool is_color, const std::string & encoding,
218219
const AnnotatedDatum_AnnotationType type, const string& labeltype,
219-
const std::map<string, int>& name_to_label, AnnotatedDatum* anno_datum) {
220+
const std::map<string, int>& name_to_label, AnnotatedDatum* anno_datum,
221+
const bool caffe_yolo = false) {
220222
return ReadRichImageToAnnotatedDatum(filename, labelname, height, width, 0, 0,
221-
is_color, encoding, type, labeltype, name_to_label,
222-
anno_datum);
223+
is_color, encoding, type, labeltype,
224+
name_to_label, anno_datum, caffe_yolo);
223225
}
224226

225227
bool ReadXMLToAnnotatedDatum(const string& labelname, const int img_height,
226228
const int img_width, const std::map<string, int>& name_to_label,
227-
AnnotatedDatum* anno_datum);
229+
AnnotatedDatum* anno_datum, const bool caffe_yolo = false);
228230

229231
bool ReadJSONToAnnotatedDatum(const string& labelname, const int img_height,
230232
const int img_width, const std::map<string, int>& name_to_label,
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#ifndef YOLO_PREPROCESS_HPP
2+
#define YOLO_PREPROCESS_HPP
3+
4+
#ifdef USE_OPENCV
5+
#include <opencv2/core/core.hpp>
6+
#include <opencv2/imgproc/imgproc.hpp>
7+
#endif // USE_OPENCV
8+
9+
#include "caffe/common.hpp"
10+
#include "caffe/proto/caffe.pb.h"
11+
12+
namespace caffe {
13+
14+
float rand_uniform(float min, float max);
15+
16+
float rand_scale(float value_lower, float value_upper);
17+
18+
void set_pixel(float *m, int w, int h, int ch, int x, int y, int c, float val);
19+
20+
void set_pixel_with_scaling(float *m, int w, int h, int ch, int x, int y, int c,
21+
float val, float scale);
22+
23+
float get_pixel(float *m, int w, int h, int ch, int x, int y, int c);
24+
25+
void scale_image_channel(float *im, int w, int h, int ch, int c, float v);
26+
27+
float three_way_max(float a, float b, float c);
28+
29+
float three_way_min(float a, float b, float c);
30+
31+
void rgb_to_hsv(float *im, int width, int height, int channels);
32+
33+
void hsv_to_rgb(float *im, int width, int height, int channels);
34+
35+
void constrain_image(float *im, int w, int h, int c);
36+
37+
float constrain(float min, float max, float a);
38+
39+
void distort_image(float *im, int w, int h, int c, float hue, float sat,
40+
float val);
41+
42+
void random_distort_image(float *im, int w, int h, int c, float hue,
43+
float saturation_lower, float saturation_upper,
44+
float exposure_lower, float exposure_upper);
45+
46+
void flip_image(float *im, int w, int h, int c);
47+
48+
#ifdef USE_OPENCV
49+
void bgr_to_rgb(cv::Mat im);
50+
51+
cv::Mat hwc_to_chw(cv::Mat im);
52+
53+
float get_pixel_image(const cv::Mat& m, int x, int y, int c);
54+
55+
float get_pixel_extend(const cv::Mat& m, int x, int y, int c);
56+
57+
float bilinear_interpolate(const cv::Mat& im, float x, float y, int c);
58+
59+
void place_image(cv::Mat im, int w, int h, int dx, int dy, float *resized_image,
60+
int resize_w, int resize_h, float scale);
61+
#endif // USE_OPENCV
62+
63+
} // namespace caffe
64+
65+
#endif // YOLO_PREPROCESS_HPP

scripts/create_annoset.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@
4747
help="Randomly shuffle the order of images and their labels.")
4848
parser.add_argument("--check-label", default = False, action = "store_true",
4949
help="Check that there is no duplicated name/label.")
50-
50+
parser.add_argument("--caffe-yolo", default = False, action = "store_true",
51+
help="Generate data to train converted Yolo model on Caffe.")
5152
args = parser.parse_args()
5253
root_dir = args.root
5354
list_file = args.listfile
@@ -69,6 +70,7 @@
6970
resize_width = args.resize_width
7071
shuffle = args.shuffle
7172
check_label = args.check_label
73+
caffe_yolo = args.caffe_yolo
7274

7375
# check if root directory exists
7476
if not os.path.exists(root_dir):
@@ -134,10 +136,11 @@
134136
" --encode_type={}" \
135137
" --encoded={}" \
136138
" --gray={}" \
139+
" --caffe_yolo={}" \
137140
" {} {} {}" \
138141
.format(caffe_root, anno_type, label_type, label_map_file, check_label,
139142
min_dim, max_dim, resize_height, resize_width, backend, shuffle,
140-
check_size, encode_type, encoded, gray, root_dir, list_file, out_dir)
143+
check_size, encode_type, encoded, gray, caffe_yolo, root_dir, list_file, out_dir)
141144
elif anno_type == "classification":
142145
cmd = "{}/build/tools/convert_annoset" \
143146
" --anno_type={}" \

0 commit comments

Comments
 (0)