@@ -32,18 +32,20 @@ def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
32
32
targets = [{k : v .to (device ) for k , v in t .items ()} for t in targets ]
33
33
loss_dict = model (images , targets )
34
34
35
+ # applying logging only in the main process
35
36
# ### OUR CODE ###
36
- # let's track the losses here by adding scalars
37
- tensorboard .logger .add_scalar_dict (
38
- # passing the dictionary of losses (pairs - loss_key: loss_value)
39
- loss_dict ,
40
- # passing the global step (number of iterations)
41
- global_step = tensorboard .global_iter ,
42
- # adding the tag to combine plots in a subgroup
43
- tag = "loss"
44
- )
45
- # incrementing the global step (number of iterations)
46
- tensorboard .global_iter += 1
37
+ if utils .is_main_process ():
38
+ # let's track the losses here by adding scalars
39
+ tensorboard .logger .add_scalar_dict (
40
+ # passing the dictionary of losses (pairs - loss_key: loss_value)
41
+ loss_dict ,
42
+ # passing the global step (number of iterations)
43
+ global_step = tensorboard .global_iter ,
44
+ # adding the tag to combine plots in a subgroup
45
+ tag = "loss"
46
+ )
47
+ # incrementing the global step (number of iterations)
48
+ tensorboard .global_iter += 1
47
49
# ### END OF OUR CODE ###
48
50
49
51
losses = sum (loss for loss in loss_dict .values ())
@@ -109,25 +111,27 @@ def evaluate(model, data_loader, device):
109
111
model_time = time .time ()
110
112
outputs = model (images )
111
113
114
+ # applying logging only in the main process
112
115
# ### OUR CODE ###
113
- # let's track bounding box and labels predictions for the first 50 images
114
- # as we hardly want to track all validation images
115
- # but want to see how the predicted bounding boxes and labels are changing during the process
116
- if i < 50 :
117
- # let's add tracking images with predicted bounding boxes
118
- tensorboard .logger .add_image_with_boxes (
119
- # adding pred_images tag to combine images in one subgroup
120
- "pred_images/PD-{}" .format (i ),
121
- # passing image tensor
122
- img ,
123
- # passing predicted bounding boxes
124
- outputs [0 ]["boxes" ].cpu (),
125
- # mapping & passing predicted labels
126
- labels = [
127
- tensorboard .COCO_INSTANCE_CATEGORY_NAMES [i ]
128
- for i in outputs [0 ]["labels" ].cpu ().numpy ()
129
- ],
130
- )
116
+ if utils .is_main_process ():
117
+ # let's track bounding box and labels predictions for the first 50 images
118
+ # as we hardly want to track all validation images
119
+ # but want to see how the predicted bounding boxes and labels are changing during the process
120
+ if i < 50 :
121
+ # let's add tracking images with predicted bounding boxes
122
+ tensorboard .logger .add_image_with_boxes (
123
+ # adding pred_images tag to combine images in one subgroup
124
+ "pred_images/PD-{}" .format (i ),
125
+ # passing image tensor
126
+ img ,
127
+ # passing predicted bounding boxes
128
+ outputs [0 ]["boxes" ].cpu (),
129
+ # mapping & passing predicted labels
130
+ labels = [
131
+ tensorboard .COCO_INSTANCE_CATEGORY_NAMES [i ]
132
+ for i in outputs [0 ]["labels" ].cpu ().numpy ()
133
+ ],
134
+ )
131
135
# ### END OUR CODE ###
132
136
outputs = [{k : v .to (cpu_device ) for k , v in t .items ()} for t in outputs ]
133
137
model_time = time .time () - model_time
@@ -144,7 +148,9 @@ def evaluate(model, data_loader, device):
144
148
coco_evaluator .synchronize_between_processes ()
145
149
146
150
# accumulate predictions from all images
147
- coco_evaluator .accumulate ()
148
- coco_evaluator .summarize ()
151
+ # add main process check for multi-gpu training (torch.distributed)
152
+ if utils .is_main_process ():
153
+ coco_evaluator .accumulate ()
154
+ coco_evaluator .summarize ()
149
155
torch .set_num_threads (n_threads )
150
156
return coco_evaluator
0 commit comments