|
| 1 | +from mmengine.config import read_base |
| 2 | + |
| 3 | +with read_base(): |
| 4 | + from ._base_.default_runtime import * |
| 5 | + from ._base_.schedules.schedule_1x import * |
| 6 | + from .datasets.coco_detection import * |
| 7 | + |
| 8 | +from torchvision.ops import nms |
| 9 | +from torch.nn import ReLU, BatchNorm2d, ReLU6 |
| 10 | +from torch.optim.adamw import AdamW |
| 11 | + |
| 12 | +from mmengine.hooks import EMAHook |
| 13 | +from mmengine.optim import CosineAnnealingLR, LinearLR, AmpOptimWrapper |
| 14 | +from sscma.datasets.transforms import ( |
| 15 | + MixUp, |
| 16 | + Mosaic, |
| 17 | + Pad, |
| 18 | + RandomCrop, |
| 19 | + RandomFlip, |
| 20 | + Resize, |
| 21 | + HSVRandomAug, |
| 22 | + RandomResize, |
| 23 | + LoadImageFromFile, |
| 24 | + LoadAnnotations, |
| 25 | + PackDetInputs, |
| 26 | +) |
| 27 | +from sscma.datasets import DetDataPreprocessor |
| 28 | +from sscma.engine import PipelineSwitchHook, DetVisualizationHook |
| 29 | +from sscma.models import ( |
| 30 | + BboxOverlaps2D, |
| 31 | + MlvlPointGenerator, |
| 32 | + DistancePointBBoxCoder, |
| 33 | + BatchDynamicSoftLabelAssigner, |
| 34 | + CSPNeXtPAFPN, |
| 35 | + GIoULoss, |
| 36 | + QualityFocalLoss, |
| 37 | + ExpMomentumEMA, |
| 38 | + RTMDet, |
| 39 | + RTMDetHead, |
| 40 | + RTMDetSepBNHeadModule, |
| 41 | + CSPNeXt, |
| 42 | +) |
| 43 | +from sscma.visualization import DetLocalVisualizer |
| 44 | +from sscma.deploy.models import RTMDetInfer |
| 45 | +from sscma.quantizer import RtmdetQuantModel |
| 46 | + |
| 47 | +default_hooks.visualization = dict( |
| 48 | + type=DetVisualizationHook, draw=True, test_out_dir="works" |
| 49 | +) |
| 50 | + |
| 51 | +visualizer = dict(type=DetLocalVisualizer, vis_backends=vis_backends, name="visualizer") |
| 52 | + |
| 53 | +d_factor = 0.33 |
| 54 | +w_factor = 0.25 |
| 55 | +num_classes = 80 |
| 56 | +imgsz = (640, 640) |
| 57 | +epochs = 300 |
| 58 | +stage2_num_epochs = 20 |
| 59 | +base_lr = 0.0005 |
| 60 | +interval = 10 |
| 61 | +batch_size = 32 |
| 62 | +num_workers = 16 |
| 63 | + |
| 64 | +# ratio range for random resize |
| 65 | +random_resize_ratio_range = (0.5, 2.0) |
| 66 | +# Number of cached images in mosaic |
| 67 | +mosaic_max_cached_images = 20 |
| 68 | +# Number of cached images in mixup |
| 69 | +mixup_max_cached_images = 10 |
| 70 | + |
| 71 | +checkpoint = "http://192.168.1.77/epoch_593_top1_59.06.pth" |
| 72 | +model = dict( |
| 73 | + type=RTMDet, |
| 74 | + data_preprocessor=dict( |
| 75 | + type=DetDataPreprocessor, |
| 76 | + mean=[0, 0, 0], |
| 77 | + std=[255, 255, 255], |
| 78 | + bgr_to_rgb=False, |
| 79 | + batch_augments=None, |
| 80 | + ), |
| 81 | + backbone=dict( |
| 82 | + type=CSPNeXt, |
| 83 | + arch="P5", |
| 84 | + expand_ratio=0.5, |
| 85 | + deepen_factor=d_factor, |
| 86 | + widen_factor=w_factor, |
| 87 | + channel_attention=False, |
| 88 | + split_max_pool_kernel=False, |
| 89 | + norm_cfg=dict(type=BatchNorm2d), |
| 90 | + act_cfg=dict(type=ReLU6, inplace=True), |
| 91 | + init_cfg=dict(type="Pretrained", prefix="backbone.", checkpoint=checkpoint), |
| 92 | + ), |
| 93 | + neck=dict( |
| 94 | + type=CSPNeXtPAFPN, |
| 95 | + deepen_factor=d_factor, |
| 96 | + widen_factor=w_factor, |
| 97 | + in_channels=[256, 512, 1024], |
| 98 | + out_channels=256, |
| 99 | + num_csp_blocks=3, |
| 100 | + expand_ratio=0.5, |
| 101 | + norm_cfg=dict(type=BatchNorm2d), |
| 102 | + act_cfg=dict(type=ReLU6, inplace=True), |
| 103 | + ), |
| 104 | + bbox_head=dict( |
| 105 | + type=RTMDetHead, |
| 106 | + head_module=dict( |
| 107 | + type=RTMDetSepBNHeadModule, |
| 108 | + num_classes=num_classes, |
| 109 | + in_channels=256, |
| 110 | + stacked_convs=2, |
| 111 | + feat_channels=256, |
| 112 | + widen_factor=w_factor, |
| 113 | + norm_cfg=dict(type=BatchNorm2d), |
| 114 | + act_cfg=dict(type=ReLU6, inplace=True), |
| 115 | + share_conv=False, |
| 116 | + pred_kernel_size=1, |
| 117 | + featmap_strides=[8, 16, 32], |
| 118 | + ), |
| 119 | + prior_generator=dict(type=MlvlPointGenerator, offset=0, strides=[8, 16, 32]), |
| 120 | + bbox_coder=dict(type=DistancePointBBoxCoder), |
| 121 | + loss_cls=dict( |
| 122 | + type=QualityFocalLoss, use_sigmoid=True, beta=2.0, loss_weight=1.0 |
| 123 | + ), |
| 124 | + loss_bbox=dict(type=GIoULoss, loss_weight=2.0), |
| 125 | + ), |
| 126 | + train_cfg=dict( |
| 127 | + assigner=dict( |
| 128 | + type=BatchDynamicSoftLabelAssigner, |
| 129 | + num_classes=num_classes, |
| 130 | + topk=13, |
| 131 | + iou_calculator=dict(type=BboxOverlaps2D), |
| 132 | + ), |
| 133 | + allowed_border=-1, |
| 134 | + pos_weight=-1, |
| 135 | + debug=False, |
| 136 | + ), |
| 137 | + test_cfg=dict( |
| 138 | + multi_label=True, |
| 139 | + nms_pre=30000, |
| 140 | + min_bbox_size=0, |
| 141 | + score_thr=0.001, |
| 142 | + nms=dict(type=nms, iou_threshold=0.65), |
| 143 | + max_per_img=300, |
| 144 | + ), |
| 145 | +) |
| 146 | +deploy = dict( |
| 147 | + type=RTMDetInfer, |
| 148 | + data_preprocessor=dict( |
| 149 | + type=DetDataPreprocessor, |
| 150 | + mean=[0, 0, 0], |
| 151 | + std=[255, 255, 255], |
| 152 | + bgr_to_rgb=False, |
| 153 | + batch_augments=None, |
| 154 | + ), |
| 155 | +) |
| 156 | +model["bbox_head"].update(train_cfg=model["train_cfg"]) |
| 157 | +model["bbox_head"].update(test_cfg=model["test_cfg"]) |
| 158 | +quantizer_config = dict( |
| 159 | + type=RtmdetQuantModel, |
| 160 | + bbox_head=model["bbox_head"], |
| 161 | + data_preprocessor=model["data_preprocessor"], # data_preprocessor, |
| 162 | +) |
| 163 | +imdecode_backend = "cv2" |
| 164 | +train_pipeline = [ |
| 165 | + dict( |
| 166 | + type=LoadImageFromFile, |
| 167 | + imdecode_backend=imdecode_backend, |
| 168 | + backend_args=None, |
| 169 | + ), |
| 170 | + dict(type=LoadAnnotations, imdecode_backend=imdecode_backend, with_bbox=True), |
| 171 | + dict( |
| 172 | + type=Mosaic, |
| 173 | + img_scale=imgsz, |
| 174 | + use_cached=True, |
| 175 | + max_cached_images=mosaic_max_cached_images, # note |
| 176 | + random_pop=False, # note |
| 177 | + pad_val=114.0, |
| 178 | + ), |
| 179 | + dict( |
| 180 | + type=RandomResize, |
| 181 | + scale=(imgsz[0] * 2, imgsz[1] * 2), |
| 182 | + ratio_range=(0.5, 2.0), |
| 183 | + resize_type=Resize, |
| 184 | + keep_ratio=True, |
| 185 | + ), |
| 186 | + dict(type=RandomCrop, crop_size=imgsz), |
| 187 | + dict(type=HSVRandomAug), |
| 188 | + dict(type=RandomFlip, prob=0.5), |
| 189 | + dict(type=Pad, size=imgsz, pad_val=dict(img=(114, 114, 114))), |
| 190 | + dict( |
| 191 | + type=MixUp, |
| 192 | + use_cached=True, |
| 193 | + random_pop=False, |
| 194 | + max_cached_images=mixup_max_cached_images, |
| 195 | + prob=0.5, |
| 196 | + ), |
| 197 | + dict(type=PackDetInputs), |
| 198 | +] |
| 199 | + |
| 200 | +train_pipeline_stage2 = [ |
| 201 | + dict( |
| 202 | + type=LoadImageFromFile, |
| 203 | + imdecode_backend=imdecode_backend, |
| 204 | + backend_args=None, |
| 205 | + ), |
| 206 | + dict(type=LoadAnnotations, imdecode_backend=imdecode_backend, with_bbox=True), |
| 207 | + dict( |
| 208 | + type=RandomResize, |
| 209 | + scale=(imgsz[0] * 2, imgsz[1] * 2), |
| 210 | + ratio_range=(0.5, 2.0), |
| 211 | + resize_type=Resize, |
| 212 | + keep_ratio=True, |
| 213 | + ), |
| 214 | + dict(type=RandomCrop, crop_size=imgsz), |
| 215 | + dict(type=HSVRandomAug), |
| 216 | + dict(type=RandomFlip, prob=0.5), |
| 217 | + dict(type=Pad, size=imgsz, pad_val=dict(img=(114, 114, 114))), |
| 218 | + dict(type=PackDetInputs), |
| 219 | +] |
| 220 | + |
| 221 | +test_pipeline = [ |
| 222 | + dict( |
| 223 | + type=LoadImageFromFile, |
| 224 | + imdecode_backend=imdecode_backend, |
| 225 | + backend_args=backend_args, |
| 226 | + ), |
| 227 | + dict(type=LoadAnnotations, imdecode_backend=imdecode_backend, with_bbox=True), |
| 228 | + dict(type=Resize, scale=imgsz, keep_ratio=True), |
| 229 | + dict(type=Pad, size=imgsz, pad_val=dict(img=(114, 114, 114))), |
| 230 | + dict( |
| 231 | + type=PackDetInputs, |
| 232 | + meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"), |
| 233 | + ), |
| 234 | +] |
| 235 | + |
| 236 | + |
| 237 | +train_dataloader.update( |
| 238 | + dict( |
| 239 | + batch_size=batch_size, |
| 240 | + num_workers=num_workers, |
| 241 | + batch_sampler=None, |
| 242 | + pin_memory=True, |
| 243 | + collate_fn=coco_collate, |
| 244 | + dataset=dict( |
| 245 | + pipeline=train_pipeline, ann_file="annotations/instances_train2017.json" |
| 246 | + ), |
| 247 | + ) |
| 248 | +) |
| 249 | + |
| 250 | +# Config of batch shapes. Only on val. |
| 251 | +batch_shapes_cfg = dict( |
| 252 | + type=BatchShapePolicy, |
| 253 | + batch_size=32, |
| 254 | + img_size=imgsz[0], |
| 255 | + size_divisor=32, |
| 256 | + extra_pad_ratio=0.5, |
| 257 | +) |
| 258 | + |
| 259 | + |
| 260 | +val_dataloader = dict( |
| 261 | + batch_size=16, |
| 262 | + num_workers=8, |
| 263 | + persistent_workers=True, |
| 264 | + pin_memory=True, |
| 265 | + drop_last=False, |
| 266 | + sampler=dict(type=DefaultSampler, shuffle=False), |
| 267 | + dataset=dict( |
| 268 | + type=dataset_type, |
| 269 | + data_root=data_root, |
| 270 | + ann_file="annotations/instances_val2017.json", |
| 271 | + data_prefix=dict(img="val2017/"), |
| 272 | + test_mode=True, |
| 273 | + pipeline=test_pipeline, |
| 274 | + batch_shapes_cfg=batch_shapes_cfg, |
| 275 | + ), |
| 276 | +) |
| 277 | +test_dataloader = val_dataloader |
| 278 | + |
| 279 | + |
| 280 | +train_cfg.update( |
| 281 | + dict( |
| 282 | + max_epochs=epochs, |
| 283 | + val_interval=interval, |
| 284 | + dynamic_intervals=[(epochs - stage2_num_epochs, 1)], |
| 285 | + ) |
| 286 | +) |
| 287 | + |
| 288 | +val_evaluator.update(dict(proposal_nums=(100, 1, 10))) |
| 289 | +test_evaluator = val_evaluator |
| 290 | + |
| 291 | +# optimizer |
| 292 | +optim_wrapper = dict( |
| 293 | + type=AmpOptimWrapper, |
| 294 | + optimizer=dict(type=AdamW, lr=base_lr, weight_decay=0.05), |
| 295 | + paramwise_cfg=dict(norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True), |
| 296 | +) |
| 297 | + |
| 298 | +# learning rate |
| 299 | +param_scheduler = [ |
| 300 | + dict(type=LinearLR, start_factor=1.0e-5, by_epoch=False, begin=0, end=2000), |
| 301 | + dict( |
| 302 | + # use cosine lr from 150 to 300 epoch |
| 303 | + type=CosineAnnealingLR, |
| 304 | + eta_min=base_lr * 0.05, |
| 305 | + begin=epochs // 2, |
| 306 | + end=epochs, |
| 307 | + T_max=epochs // 2, |
| 308 | + by_epoch=True, |
| 309 | + convert_to_iter_based=True, |
| 310 | + ), |
| 311 | +] |
| 312 | + |
| 313 | +# hooks |
| 314 | +default_hooks.update( |
| 315 | + dict( |
| 316 | + checkpoint=dict( |
| 317 | + interval=interval, |
| 318 | + max_keep_ckpts=3, # only keep latest 3 checkpoints |
| 319 | + save_best="auto", |
| 320 | + ) |
| 321 | + ) |
| 322 | +) |
| 323 | + |
| 324 | +custom_hooks = [ |
| 325 | + dict( |
| 326 | + type=EMAHook, |
| 327 | + ema_type=ExpMomentumEMA, |
| 328 | + momentum=0.0002, |
| 329 | + update_buffers=True, |
| 330 | + priority=49, |
| 331 | + ), |
| 332 | + dict( |
| 333 | + type=PipelineSwitchHook, |
| 334 | + switch_epoch=epochs - stage2_num_epochs, |
| 335 | + switch_pipeline=train_pipeline_stage2, |
| 336 | + ), |
| 337 | +] |
| 338 | +auto_scale_lr = dict(enable=True, base_batch_size=32) |
| 339 | + |
| 340 | +dump_config = True |
0 commit comments