Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions configs/rtdetrv3/_base_/optimizer_6x.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
epoch: 72

LearningRate:
base_lr: 0.0004
schedulers:
- !PiecewiseDecay
gamma: 1.0
milestones: [100]
use_warmup: true
- !LinearWarmup
start_factor: 0.001
steps: 2000

OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001
44 changes: 44 additions & 0 deletions configs/rtdetrv3/_base_/rtdetr_reader.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
worker_num: 4
TrainReader:
sample_transforms:
- Decode: {}
- RandomDistort: {prob: 0.8}
- RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
- RandomCrop: {prob: 0.8}
- RandomFlip: {}
batch_transforms:
- BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- NormalizeBox: {retain_origin_box: true}
- BboxXYXY2XYWH: {}
- Permute: {}
- PadGT: {only_origin_box: true}
batch_size: 16
shuffle: true
drop_last: true
collate_batch: false
use_shared_memory: true


EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 16
shuffle: false
drop_last: false


TestReader:
inputs_def:
image_shape: [3, 640, 640]
sample_transforms:
- Decode: {}
- Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false
100 changes: 100 additions & 0 deletions configs/rtdetrv3/_base_/rtdetrv3_r50vd.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
architecture: RTDETRV3
pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
norm_type: sync_bn
use_ema: True
ema_decay: 0.9999
ema_decay_type: "exponential"
ema_filter_no_grad: True
hidden_dim: 256
use_focal_loss: True
eval_size: [640, 640]


RTDETRV3:
backbone: ResNet
neck: HybridEncoder
transformer: RTDETRTransformerv3
detr_head: DINOv3Head
aux_o2m_head: PPYOLOEHead
post_process: DETRPostProcess

ResNet:
# index 0 stands for res2
depth: 50
variant: d
norm_type: bn
freeze_at: 0
return_idx: [1, 2, 3]
lr_mult_list: [0.1, 0.1, 0.1, 0.1]
num_stages: 4
freeze_stem_only: True

HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 1024
dropout: 0.
activation: 'gelu'
expansion: 1.0


RTDETRTransformerv3:
num_queries: 300
position_embed_type: sine
feat_strides: [8, 16, 32]
num_levels: 3
nhead: 8
num_decoder_layers: 6
dim_feedforward: 1024
dropout: 0.0
activation: relu
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: False
num_noises: 0
num_noise_queries: []
num_noise_denoising: 100
learnt_init_query: False


DINOv3Head:
o2m: 4
loss:
name: DINOv3Loss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
use_vfl: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}

PPYOLOEHead:
fpn_strides: [8, 16, 32]
grid_cell_scale: 5.0
grid_cell_offset: 0.5
static_assigner_epoch: 30
use_varifocal_loss: True
loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
static_assigner:
name: ATSSAssigner
topk: 9
assigner:
name: TaskAlignedAssigner
topk: 13
alpha: 1.0
beta: 6.0
nms:
name: MultiClassNMS
nms_top_k: 1000
keep_top_k: 300
score_threshold: 0.01
nms_threshold: 0.7

DETRPostProcess:
num_top_queries: 300
54 changes: 54 additions & 0 deletions configs/rtdetrv3/rtdetrv3_r18vd_6x_coco.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetrv3_r50vd.yml',
'_base_/rtdetr_reader.yml',
]

weights: output/rtdetrv3_r18vd_6x_coco/model_final
find_unused_parameters: True
log_iter: 200

o2m_branch: True
num_queries_o2m: 450

pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams

RTDETRV3:
backbone: ResNet
neck: HybridEncoder
transformer: RTDETRTransformerv3
detr_head: DINOv3Head
aux_o2m_head: PPYOLOEHead
post_process: DETRPostProcess

ResNet:
depth: 18
variant: d
return_idx: [1, 2, 3]
freeze_at: -1
freeze_norm: false
norm_decay: 0.

HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 1024
dropout: 0.
activation: 'gelu'
expansion: 0.5
depth_mult: 1.0

RTDETRTransformerv3:
eval_idx: -1
num_decoder_layers: 3
num_noises: 3
num_noise_queries: [300, 300, 300]
num_noise_denoising: 100
learnt_init_query: False
54 changes: 54 additions & 0 deletions configs/rtdetrv3/rtdetrv3_r34vd_6x_coco.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetrv3_r50vd.yml',
'_base_/rtdetr_reader.yml',
]

weights: output/rtdetrv3_r34vd_6x_coco/model_final
find_unused_parameters: True
log_iter: 200

o2m_branch: True
num_queries_o2m: 450

pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ResNet34_vd_pretrained.pdparams

RTDETRV3:
backbone: ResNet
neck: HybridEncoder
transformer: RTDETRTransformerv3
detr_head: DINOv3Head
aux_o2m_head: PPYOLOEHead
post_process: DETRPostProcess

ResNet:
depth: 34
variant: d
return_idx: [1, 2, 3]
freeze_at: -1
freeze_norm: false
norm_decay: 0.

HybridEncoder:
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 1024
dropout: 0.
activation: 'gelu'
expansion: 0.5
depth_mult: 1.0

RTDETRTransformerv3:
eval_idx: -1
num_decoder_layers: 4
num_noises: 3
num_noise_queries: [300, 300, 300]
num_noise_denoising: 100
learnt_init_query: False
25 changes: 25 additions & 0 deletions configs/rtdetrv3/rtdetrv3_r50vd_6x_coco.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'_base_/optimizer_6x.yml',
'_base_/rtdetrv3_r50vd.yml',
'_base_/rtdetr_reader.yml',
]

weights: output/rtdetrv3_r50vd_6x_coco/model_final
find_unused_parameters: True
log_iter: 200

o2m_branch: True
num_queries_o2m: 450

pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams


RTDETRTransformerv3:
eval_idx: -1
num_decoder_layers: 6
num_noises: 2
num_noise_queries: [300, 300]
num_noise_denoising: 100
learnt_init_query: False
2 changes: 1 addition & 1 deletion ppdet/engine/naive_sync_bn.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,4 @@ def convert_bn(model):
bn.set_dict(m.state_dict())
setattr(model, n, bn)
else:
convert_bn(m)
convert_bn(m)
3 changes: 3 additions & 0 deletions ppdet/modeling/architectures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,6 @@
from .detr_ssod import *
from .multi_stream_detector import *
from .clrnet import *

from . import rtdetrv3
from .rtdetrv3 import *
Loading