inference error result #2349
Replies: 1 comment
-
resize augmentation is forbidden on mono 3d obj detection. |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
Uh oh!
There was an error while loading. Please reload this page.
-
HI, I am trying using kitti format waymo dataset to train mono 3d detector fcos3d. I have finished training process, but result in unreasonable result.

besides, there are many 0 in eval result.

detail:[dataset]
I have used tools/create_data.py to generate waymo pkl files and coco.json files, I have realized that there is only front of view images(image_0) in the label files, but this is not matters, i can add other four images and labels in the coco.json and pkl files.
[config]
`Config:
dataset_type = 'KittiMonoDataset'
data_root = '/mnt/data/waymo-set/kitti_format/'
class_names = ['Car', 'Pedestrian', 'Cyclist']
input_modality = dict(use_lidar=False, use_camera=True)
img_norm_cfg = dict(
mean=[103.53, 116.28, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='Resize', img_scale=(1280, 864), keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=['Car', 'Pedestrian', 'Cyclist']),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d', 'gt_labels_3d',
'centers2d', 'depths'
])
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='MultiScaleFlipAug',
scale_factor=1.0,
flip=False,
transforms=[
dict(type='RandomFlip3D'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=['Car', 'Pedestrian', 'Cyclist'],
with_label=False),
dict(type='Collect3D', keys=['img'])
])
]
eval_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['img'])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type='KittiMonoDataset',
data_root='/mnt/data/waymo-set/kitti_format/',
ann_file=
'/mnt/data/waymo-set/kitti_format/waymo_infos_train_mono3d.coco.json',
info_file='/mnt/data/waymo-set/kitti_format/waymo_infos_train.pkl',
img_prefix='/mnt/data/waymo-set/kitti_format/',
classes=['Pedestrian', 'Cyclist', 'Car'],
pipeline=[
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='Resize', img_scale=(1280, 864), keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=['Car', 'Pedestrian', 'Cyclist']),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers2d', 'depths'
])
],
modality=dict(use_lidar=False, use_camera=True),
use_valid_flag=True,
test_mode=False,
box_type_3d='Camera'),
val=dict(
type='KittiMonoDataset',
data_root='/mnt/data/waymo-set/kitti_format/',
ann_file=
'/mnt/data/waymo-set/kitti_format/waymo_infos_val_mono3d.coco.json',
info_file='/mnt/data/waymo-set/kitti_format/waymo_infos_val.pkl',
img_prefix='/mnt/data/waymo-set/kitti_format/',
classes=['Pedestrian', 'Cyclist', 'Car'],
pipeline=[
dict(type='LoadImageFromFileMono3D'),
dict(
type='MultiScaleFlipAug',
scale_factor=1.0,
flip=False,
transforms=[
dict(type='RandomFlip3D'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=['Car', 'Pedestrian', 'Cyclist'],
with_label=False),
dict(type='Collect3D', keys=['img'])
])
],
modality=dict(use_lidar=False, use_camera=True),
use_valid_flag=True,
test_mode=False,
box_type_3d='Camera'),
test=dict(
type='KittiMonoDataset',
data_root='/mnt/data/waymo-set/kitti_format/',
ann_file=
'/mnt/data/waymo-set/kitti_format/waymo_infos_val_mono3d.coco.json',
info_file='/mnt/data/waymo-set/kitti_format/waymo_infos_val.pkl',
img_prefix='/mnt/data/waymo-set/kitti_format/',
classes=['Pedestrian', 'Cyclist', 'Car'],
pipeline=[
dict(type='LoadImageFromFileMono3D'),
dict(
type='MultiScaleFlipAug',
scale_factor=1.0,
flip=False,
transforms=[
dict(type='RandomFlip3D'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=['Car', 'Pedestrian', 'Cyclist'],
with_label=False),
dict(type='Collect3D', keys=['img'])
])
],
modality=dict(use_lidar=False, use_camera=True),
use_valid_flag=True,
test_mode=True,
box_type_3d='Camera'))
evaluation = dict(interval=2)
model = dict(
type='FCOSMono3D',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe',
init_cfg=dict(
type='Pretrained',
checkpoint='open-mmlab://detectron2/resnet50_caffe'),
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, False, True, True)),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_output',
num_outs=5,
relu_before_extra_convs=True),
bbox_head=dict(
type='FCOSMono3DHead',
num_classes=3,
in_channels=256,
stacked_convs=2,
feat_channels=256,
bbox_code_size=7,
use_direction_classifier=True,
diff_rad_by_sin=True,
pred_attrs=False,
pred_velo=False,
dir_offset=0.7854,
dir_limit_offset=0,
strides=[8, 16, 32, 64, 128],
group_reg_dims=(2, 1, 3, 1, 2),
cls_branch=(256, ),
reg_branch=((256, ), (256, ), (256, ), (256, ), ()),
dir_branch=(256, ),
attr_branch=(256, ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_attr=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=7),
norm_on_bbox=True,
centerness_on_reg=True,
center_sampling=True,
conv_bias=True,
dcn_on_last_conv=True),
train_cfg=dict(
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.8,
score_thr=0.05,
min_bbox_size=0,
max_per_img=200))
optimizer = dict(
type='SGD',
lr=0.001,
momentum=0.9,
weight_decay=0.0001,
paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0))
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.3333333333333333,
step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12)
checkpoint_config = dict(interval=1)
log_config = dict(
interval=50,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = None
load_from = 'work_dirs/fcos3d_r50/epoch_2.pth'
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
total_epochs = 12
`
I am waiting for your reply.
Beta Was this translation helpful? Give feedback.
All reactions