|
88 | 88 | loss_cls=dict(
|
89 | 89 | type='mmdet.GaussianFocalLoss', reduction='mean', loss_weight=1.0),
|
90 | 90 | loss_bbox=dict(type='mmdet.L1Loss', reduction='mean', loss_weight=2.0),
|
| 91 | + loss_iou=dict(type='mmdet.L1Loss', reduction='sum', loss_weight=1.0), |
| 92 | + loss_reg_iou=dict( |
| 93 | + type='mmdet3d.DIoU3DLoss', reduction='mean', loss_weight=2.0), |
91 | 94 | norm_bbox=True),
|
92 | 95 | # model training and testing settings
|
93 | 96 | train_cfg=dict(
|
94 |
| - pts=dict( |
95 |
| - grid_size=grid_size, |
96 |
| - voxel_size=voxel_size, |
97 |
| - out_size_factor=4, |
98 |
| - dense_reg=1, |
99 |
| - gaussian_overlap=0.1, |
100 |
| - max_objs=500, |
101 |
| - min_radius=2, |
102 |
| - code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])), |
| 97 | + grid_size=grid_size, |
| 98 | + voxel_size=voxel_size, |
| 99 | + point_cloud_range=point_cloud_range, |
| 100 | + out_size_factor=1, |
| 101 | + dense_reg=1, |
| 102 | + gaussian_overlap=0.1, |
| 103 | + max_objs=500, |
| 104 | + min_radius=2, |
| 105 | + code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]), |
103 | 106 | test_cfg=dict(
|
104 | 107 | max_per_img=500,
|
105 | 108 | max_pool_nms=False,
|
106 | 109 | min_radius=[4, 12, 10, 1, 0.85, 0.175],
|
107 | 110 | iou_rectifier=[[0.68, 0.71, 0.65]],
|
108 | 111 | pc_range=[-80, -80],
|
109 |
| - out_size_factor=4, |
| 112 | + out_size_factor=1, |
110 | 113 | voxel_size=voxel_size[:2],
|
111 | 114 | nms_type='rotate',
|
112 | 115 | multi_class_nms=True,
|
|
128 | 131 | coord_type='LIDAR',
|
129 | 132 | load_dim=6,
|
130 | 133 | use_dim=[0, 1, 2, 3, 4],
|
| 134 | + norm_intensity=True, |
| 135 | + norm_elongation=True, |
131 | 136 | backend_args=backend_args),
|
132 | 137 | backend_args=backend_args)
|
133 | 138 |
|
|
138 | 143 | load_dim=6,
|
139 | 144 | use_dim=5,
|
140 | 145 | norm_intensity=True,
|
| 146 | + norm_elongation=True, |
141 | 147 | backend_args=backend_args),
|
142 |
| - # Add this if using `MultiFrameDeformableDecoderRPN` |
143 |
| - # dict( |
144 |
| - # type='LoadPointsFromMultiSweeps', |
145 |
| - # sweeps_num=9, |
146 |
| - # load_dim=6, |
147 |
| - # use_dim=[0, 1, 2, 3, 4], |
148 |
| - # pad_empty_sweeps=True, |
149 |
| - # remove_close=True), |
150 | 148 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
|
151 | 149 | dict(type='ObjectSample', db_sampler=db_sampler),
|
| 150 | + dict( |
| 151 | + type='RandomFlip3D', |
| 152 | + sync_2d=False, |
| 153 | + flip_ratio_bev_horizontal=0.5, |
| 154 | + flip_ratio_bev_vertical=0.5), |
152 | 155 | dict(
|
153 | 156 | type='GlobalRotScaleTrans',
|
154 | 157 | rot_range=[-0.78539816, 0.78539816],
|
155 | 158 | scale_ratio_range=[0.95, 1.05],
|
156 |
| - translation_std=[0.5, 0.5, 0]), |
157 |
| - dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), |
158 |
| - dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), |
159 |
| - dict(type='ObjectNameFilter', classes=class_names), |
| 159 | + translation_std=[0.5, 0.5, 0.5]), |
| 160 | + dict(type='PointsRangeFilter3D', point_cloud_range=point_cloud_range), |
| 161 | + dict(type='ObjectRangeFilter3D', point_cloud_range=point_cloud_range), |
160 | 162 | dict(type='PointShuffle'),
|
161 | 163 | dict(
|
162 | 164 | type='Pack3DDetInputs',
|
|
172 | 174 | norm_intensity=True,
|
173 | 175 | norm_elongation=True,
|
174 | 176 | backend_args=backend_args),
|
| 177 | + dict(type='PointsRangeFilter3D', point_cloud_range=point_cloud_range), |
175 | 178 | dict(
|
176 |
| - type='MultiScaleFlipAug3D', |
177 |
| - img_scale=(1333, 800), |
178 |
| - pts_scale_ratio=1, |
179 |
| - flip=False, |
180 |
| - transforms=[ |
181 |
| - dict( |
182 |
| - type='GlobalRotScaleTrans', |
183 |
| - rot_range=[0, 0], |
184 |
| - scale_ratio_range=[1., 1.], |
185 |
| - translation_std=[0, 0, 0]), |
186 |
| - dict(type='RandomFlip3D'), |
187 |
| - dict( |
188 |
| - type='PointsRangeFilter', point_cloud_range=point_cloud_range) |
189 |
| - ]), |
190 |
| - dict(type='Pack3DDetInputs', keys=['points']) |
| 179 | + type='Pack3DDetInputs', |
| 180 | + keys=['points'], |
| 181 | + meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp']) |
191 | 182 | ]
|
192 | 183 |
|
193 | 184 | dataset_type = 'WaymoDataset'
|
| 185 | +train_dataloader = dict( |
| 186 | + batch_size=1, |
| 187 | + num_workers=4, |
| 188 | + persistent_workers=True, |
| 189 | + sampler=dict(type='DefaultSampler', shuffle=True), |
| 190 | + dataset=dict( |
| 191 | + type=dataset_type, |
| 192 | + data_root=data_root, |
| 193 | + ann_file='waymo_infos_train.pkl', |
| 194 | + data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'), |
| 195 | + pipeline=train_pipeline, |
| 196 | + modality=input_modality, |
| 197 | + test_mode=False, |
| 198 | + metainfo=metainfo, |
| 199 | + # we use box_type_3d='LiDAR' in kitti and nuscenes dataset |
| 200 | + # and box_type_3d='Depth' in sunrgbd and scannet dataset. |
| 201 | + box_type_3d='LiDAR', |
| 202 | + # load one frame every five frames |
| 203 | + load_interval=5, |
| 204 | + backend_args=backend_args)) |
194 | 205 | val_dataloader = dict(
|
195 | 206 | batch_size=4,
|
196 | 207 | num_workers=4,
|
|
212 | 223 |
|
213 | 224 | val_evaluator = dict(
|
214 | 225 | type='WaymoMetric',
|
215 |
| - ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', |
216 | 226 | waymo_bin_file='./data/waymo/waymo_format/gt.bin',
|
217 |
| - data_root='./data/waymo/waymo_format', |
218 |
| - backend_args=backend_args, |
219 |
| - convert_kitti_format=False, |
220 |
| - idx2metainfo='./data/waymo/waymo_format/idx2metainfo.pkl') |
| 227 | + result_prefix='./dsvt_pred') |
221 | 228 | test_evaluator = val_evaluator
|
222 | 229 |
|
223 | 230 | vis_backends = [dict(type='LocalVisBackend')]
|
224 | 231 | visualizer = dict(
|
225 | 232 | type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
|
226 | 233 |
|
| 234 | +# schedules |
| 235 | +lr = 1e-5 |
| 236 | +optim_wrapper = dict( |
| 237 | + type='OptimWrapper', |
| 238 | + optimizer=dict(type='AdamW', lr=lr, weight_decay=0.05, betas=(0.9, 0.99)), |
| 239 | + clip_grad=dict(max_norm=10, norm_type=2)) |
| 240 | +param_scheduler = [ |
| 241 | + dict( |
| 242 | + type='CosineAnnealingLR', |
| 243 | + T_max=1.2, |
| 244 | + eta_min=lr * 100, |
| 245 | + begin=0, |
| 246 | + end=1.2, |
| 247 | + by_epoch=True, |
| 248 | + convert_to_iter_based=True), |
| 249 | + dict( |
| 250 | + type='CosineAnnealingLR', |
| 251 | + T_max=10.8, |
| 252 | + eta_min=lr * 1e-4, |
| 253 | + begin=1.2, |
| 254 | + end=12, |
| 255 | + by_epoch=True, |
| 256 | + convert_to_iter_based=True), |
| 257 | + # momentum scheduler |
| 258 | + dict( |
| 259 | + type='CosineAnnealingMomentum', |
| 260 | + T_max=1.2, |
| 261 | + eta_min=0.85, |
| 262 | + begin=0, |
| 263 | + end=1.2, |
| 264 | + by_epoch=True, |
| 265 | + convert_to_iter_based=True), |
| 266 | + dict( |
| 267 | + type='CosineAnnealingMomentum', |
| 268 | + T_max=10.8, |
| 269 | + eta_min=0.95, |
| 270 | + begin=1.2, |
| 271 | + end=12, |
| 272 | + by_epoch=True, |
| 273 | + convert_to_iter_based=True) |
| 274 | +] |
| 275 | + |
| 276 | +# runtime settings |
| 277 | +train_cfg = dict(by_epoch=True, max_epochs=12, val_interval=1) |
| 278 | + |
227 | 279 | # runtime settings
|
228 | 280 | val_cfg = dict()
|
229 | 281 | test_cfg = dict()
|
|
236 | 288 |
|
237 | 289 | default_hooks = dict(
|
238 | 290 | logger=dict(type='LoggerHook', interval=50),
|
239 |
| - checkpoint=dict(type='CheckpointHook', interval=5)) |
| 291 | + checkpoint=dict(type='CheckpointHook', interval=1)) |
| 292 | +custom_hooks = [ |
| 293 | + dict( |
| 294 | + type='DisableAugHook', |
| 295 | + disable_after_epoch=11, |
| 296 | + disable_aug_list=[ |
| 297 | + 'GlobalRotScaleTrans', 'RandomFlip3D', 'ObjectSample' |
| 298 | + ]) |
| 299 | +] |
0 commit comments