Skip to content

Commit c308c03

Browse files
MehdiShzTaylor Robie
authored andcommitted
Mask R-CNN model added to models/research/mlperf_object_detection/Mask_RCNN (#4678)
* Create README.md * readme changed * readme changed * ResNet backbone completed. * FPN added * Create README.md * initial commit * files removed * initial commit * protobuf file removed
1 parent 32e7d66 commit c308c03

File tree

370 files changed

+89836
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

370 files changed

+89836
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Mask RCNN Implimentation adopted from models/research/object_detection/
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
# Mask R-CNN with Resnet-50 (v1), Atrous version
2+
# Configured for MSCOCO Dataset.
3+
# Users should configure the fine_tune_checkpoint field in the train config as
4+
# well as the label_map_path and input_path fields in the train_input_reader and
5+
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
6+
# should be configured.
7+
8+
model {
9+
faster_rcnn {
10+
num_classes: 81
11+
image_resizer {
12+
keep_aspect_ratio_resizer {
13+
min_dimension: 800
14+
max_dimension: 1365
15+
}
16+
}
17+
number_of_stages: 3
18+
feature_extractor {
19+
type: 'faster_rcnn_resnet50'
20+
first_stage_features_stride: 8
21+
}
22+
first_stage_anchor_generator {
23+
grid_anchor_generator {
24+
scales: [0.125, 0.25, 0.5, 1.0, 2.0] # base size=256**2 => anchor sizes=32 64 128 256 512
25+
aspect_ratios: [0.5, 1.0, 2.0]
26+
height_stride: 8
27+
width_stride: 8
28+
}
29+
}
30+
first_stage_atrous_rate: 2
31+
first_stage_box_predictor_conv_hyperparams {
32+
op: CONV
33+
regularizer {
34+
l2_regularizer {
35+
weight: 0.0
36+
}
37+
}
38+
initializer {
39+
truncated_normal_initializer {
40+
stddev: 0.01
41+
}
42+
}
43+
}
44+
first_stage_nms_score_threshold: 0.0
45+
first_stage_nms_iou_threshold: 0.7
46+
first_stage_max_proposals: 512
47+
first_stage_localization_loss_weight: 2.0
48+
first_stage_objectness_loss_weight: 1.0
49+
initial_crop_size: 14
50+
maxpool_kernel_size: 2
51+
maxpool_stride: 2
52+
second_stage_batch_size: 512
53+
second_stage_box_predictor {
54+
mask_rcnn_box_predictor {
55+
use_dropout: false
56+
dropout_keep_probability: 1.0
57+
predict_instance_masks: true
58+
mask_height: 14
59+
mask_width: 14
60+
mask_prediction_conv_depth: 0
61+
mask_prediction_num_conv_layers: 3 #from mask rcnn heads
62+
fc_hyperparams {
63+
op: FC
64+
regularizer {
65+
l2_regularizer {
66+
weight: 0.0
67+
}
68+
}
69+
initializer {
70+
variance_scaling_initializer {
71+
factor: 1.0
72+
uniform: true
73+
mode: FAN_AVG
74+
}
75+
}
76+
}
77+
conv_hyperparams {
78+
op: CONV
79+
regularizer {
80+
l2_regularizer {
81+
weight: 0.0
82+
}
83+
}
84+
initializer {
85+
truncated_normal_initializer {
86+
stddev: 0.01
87+
}
88+
}
89+
}
90+
}
91+
}
92+
second_stage_post_processing {
93+
batch_non_max_suppression {
94+
score_threshold: 0.0
95+
iou_threshold: 0.6
96+
max_detections_per_class: 2000
97+
max_total_detections: 2000
98+
}
99+
score_converter: SOFTMAX
100+
}
101+
second_stage_localization_loss_weight: 2.0
102+
second_stage_classification_loss_weight: 1.0
103+
second_stage_mask_prediction_loss_weight: 4.0
104+
}
105+
}
106+
107+
train_config: {
108+
batch_size: 4
109+
optimizer {
110+
momentum_optimizer: {
111+
learning_rate: {
112+
manual_step_learning_rate {
113+
initial_learning_rate: 0.01
114+
schedule {
115+
step: 120000
116+
learning_rate: .001
117+
}
118+
schedule {
119+
step: 160000
120+
learning_rate: .0001
121+
}
122+
}
123+
}
124+
momentum_optimizer_value: 0.9
125+
}
126+
use_moving_average: false
127+
}
128+
gradient_clipping_by_norm: 10.0
129+
#fine_tune_checkpoint: "/home/mehdisharif/data/coco/resnet_v1_50.ckpt"
130+
#from_detection_checkpoint: True
131+
# Note: The below line limits the training process to 200K steps, which we
132+
# empirically found to be sufficient enough to train the pets dataset. This
133+
# effectively bypasses the learning rate schedule (the learning rate will
134+
# never decay). Remove the below line to train indefinitely.
135+
num_steps: 20000000
136+
data_augmentation_options {
137+
random_horizontal_flip {
138+
}
139+
}
140+
}
141+
142+
train_input_reader: {
143+
tf_record_input_reader {
144+
input_path: "/home/mehdisharif/data/coco/output2017/coco_train.record"
145+
}
146+
label_map_path: "/home/mehdisharif/data/coco/output2017/mscoco_label_map.pbtxt"
147+
load_instance_masks: true
148+
mask_type: PNG_MASKS
149+
}
150+
151+
eval_config: {
152+
metrics_set: ['coco_detection_metrics', 'coco_mask_metrics']
153+
num_examples: 50
154+
# Note: The below line limits the evaluation process to 10 evaluations.
155+
# Remove the below line to evaluate indefinitely.
156+
max_evals: 1
157+
num_visualizations: 50
158+
eval_interval_secs: 120
159+
}
160+
161+
eval_input_reader: {
162+
tf_record_input_reader {
163+
input_path: "/home/mehdisharif/data/coco/output2017/coco_val.record"
164+
}
165+
label_map_path: "/home/mehdisharif/data/coco/output2017/mscoco_label_map.pbtxt"
166+
load_instance_masks: true
167+
mask_type: PNG_MASKS
168+
shuffle: false
169+
num_readers: 1
170+
}
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
# Mask R-CNN with Resnet-50 (v1), Atrous version
2+
# Configured for MSCOCO Dataset.
3+
# Users should configure the fine_tune_checkpoint field in the train config as
4+
# well as the label_map_path and input_path fields in the train_input_reader and
5+
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
6+
# should be configured.
7+
8+
model {
9+
faster_rcnn {
10+
num_classes: 90
11+
image_resizer {
12+
keep_aspect_ratio_resizer {
13+
min_dimension: 800
14+
max_dimension: 1365
15+
}
16+
}
17+
number_of_stages: 3
18+
feature_extractor {
19+
type: 'faster_rcnn_resnet50'
20+
first_stage_features_stride: 8
21+
}
22+
first_stage_anchor_generator {
23+
grid_anchor_generator {
24+
scales: [0.25, 0.5, 1.0, 2.0]
25+
aspect_ratios: [0.5, 1.0, 2.0]
26+
height_stride: 8
27+
width_stride: 8
28+
}
29+
}
30+
first_stage_atrous_rate: 2
31+
first_stage_box_predictor_conv_hyperparams {
32+
op: CONV
33+
regularizer {
34+
l2_regularizer {
35+
weight: 0.0
36+
}
37+
}
38+
initializer {
39+
truncated_normal_initializer {
40+
stddev: 0.01
41+
}
42+
}
43+
}
44+
first_stage_nms_score_threshold: 0.0
45+
first_stage_nms_iou_threshold: 0.7
46+
first_stage_max_proposals: 300
47+
first_stage_localization_loss_weight: 2.0
48+
first_stage_objectness_loss_weight: 1.0
49+
initial_crop_size: 14
50+
maxpool_kernel_size: 2
51+
maxpool_stride: 2
52+
second_stage_box_predictor {
53+
mask_rcnn_box_predictor {
54+
use_dropout: false
55+
dropout_keep_probability: 1.0
56+
predict_instance_masks: true
57+
mask_height: 33
58+
mask_width: 33
59+
mask_prediction_conv_depth: 0
60+
mask_prediction_num_conv_layers: 4
61+
fc_hyperparams {
62+
op: FC
63+
regularizer {
64+
l2_regularizer {
65+
weight: 0.0
66+
}
67+
}
68+
initializer {
69+
variance_scaling_initializer {
70+
factor: 1.0
71+
uniform: true
72+
mode: FAN_AVG
73+
}
74+
}
75+
}
76+
conv_hyperparams {
77+
op: CONV
78+
regularizer {
79+
l2_regularizer {
80+
weight: 0.0
81+
}
82+
}
83+
initializer {
84+
truncated_normal_initializer {
85+
stddev: 0.01
86+
}
87+
}
88+
}
89+
}
90+
}
91+
second_stage_post_processing {
92+
batch_non_max_suppression {
93+
score_threshold: 0.0
94+
iou_threshold: 0.6
95+
max_detections_per_class: 100
96+
max_total_detections: 300
97+
}
98+
score_converter: SOFTMAX
99+
}
100+
second_stage_localization_loss_weight: 2.0
101+
second_stage_classification_loss_weight: 1.0
102+
second_stage_mask_prediction_loss_weight: 4.0
103+
}
104+
}
105+
106+
train_config: {
107+
batch_size: 2
108+
optimizer {
109+
momentum_optimizer: {
110+
learning_rate: {
111+
manual_step_learning_rate {
112+
initial_learning_rate: 0.0003
113+
schedule {
114+
step: 900000
115+
learning_rate: .00003
116+
}
117+
schedule {
118+
step: 1200000
119+
learning_rate: .000003
120+
}
121+
}
122+
}
123+
momentum_optimizer_value: 0.9
124+
}
125+
use_moving_average: false
126+
}
127+
gradient_clipping_by_norm: 10.0
128+
#fine_tune_checkpoint: ""
129+
from_detection_checkpoint: false
130+
# Note: The below line limits the training process to 200K steps, which we
131+
# empirically found to be sufficient enough to train the pets dataset. This
132+
# effectively bypasses the learning rate schedule (the learning rate will
133+
# never decay). Remove the below line to train indefinitely.
134+
#num_steps: 200000
135+
data_augmentation_options {
136+
random_horizontal_flip {
137+
}
138+
}
139+
}
140+
141+
train_input_reader: {
142+
tf_record_input_reader {
143+
input_path: "PATH_TO_BE_CONFIGURED/coco_train.record"
144+
}
145+
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
146+
load_instance_masks: true
147+
mask_type: PNG_MASKS
148+
}
149+
150+
eval_config: {
151+
metrics_set: ['coco_detection_metrics', 'coco_mask_metrics']
152+
num_examples: 50
153+
# Note: The below line limits the evaluation process to 10 evaluations.
154+
# Remove the below line to evaluate indefinitely.
155+
max_evals: 1
156+
num_visualizations: 50
157+
eval_interval_secs: 120
158+
}
159+
160+
eval_input_reader: {
161+
tf_record_input_reader {
162+
input_path: "PATH_TO_BE_CONFIGURED/coco_val.record"
163+
}
164+
label_map_path: "PATH_TO_BE_CONFIGURED/mscoco_label_map.pbtxt"
165+
load_instance_masks: true
166+
mask_type: PNG_MASKS
167+
shuffle: true
168+
num_readers: 1
169+
}

0 commit comments

Comments
 (0)