inference error result #2349

FlyingAnt2018 · 2023-03-14T02:24:16Z

FlyingAnt2018
Mar 14, 2023

HI, I am trying using kitti format waymo dataset to train mono 3d detector fcos3d. I have finished training process, but result in unreasonable result.

besides, there are many 0 in eval result.

detail:[dataset]
I have used tools/create_data.py to generate waymo pkl files and coco.json files, I have realized that there is only front of view images(image_0) in the label files, but this is not matters, i can add other four images and labels in the coco.json and pkl files.
[config]
`Config:
dataset_type = 'KittiMonoDataset'
data_root = '/mnt/data/waymo-set/kitti_format/'
class_names = ['Car', 'Pedestrian', 'Cyclist']
input_modality = dict(use_lidar=False, use_camera=True)
img_norm_cfg = dict(
mean=[103.53, 116.28, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='Resize', img_scale=(1280, 864), keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=['Car', 'Pedestrian', 'Cyclist']),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d', 'gt_labels_3d',
'centers2d', 'depths'
])
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='MultiScaleFlipAug',
scale_factor=1.0,
flip=False,
transforms=[
dict(type='RandomFlip3D'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=['Car', 'Pedestrian', 'Cyclist'],
with_label=False),
dict(type='Collect3D', keys=['img'])
])
]
eval_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'],
with_label=False),
dict(type='Collect3D', keys=['img'])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type='KittiMonoDataset',
data_root='/mnt/data/waymo-set/kitti_format/',
ann_file=
'/mnt/data/waymo-set/kitti_format/waymo_infos_train_mono3d.coco.json',
info_file='/mnt/data/waymo-set/kitti_format/waymo_infos_train.pkl',
img_prefix='/mnt/data/waymo-set/kitti_format/',
classes=['Pedestrian', 'Cyclist', 'Car'],
pipeline=[
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='Resize', img_scale=(1280, 864), keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=['Car', 'Pedestrian', 'Cyclist']),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers2d', 'depths'
])
],
modality=dict(use_lidar=False, use_camera=True),
use_valid_flag=True,
test_mode=False,
box_type_3d='Camera'),
val=dict(
type='KittiMonoDataset',
data_root='/mnt/data/waymo-set/kitti_format/',
ann_file=
'/mnt/data/waymo-set/kitti_format/waymo_infos_val_mono3d.coco.json',
info_file='/mnt/data/waymo-set/kitti_format/waymo_infos_val.pkl',
img_prefix='/mnt/data/waymo-set/kitti_format/',
classes=['Pedestrian', 'Cyclist', 'Car'],
pipeline=[
dict(type='LoadImageFromFileMono3D'),
dict(
type='MultiScaleFlipAug',
scale_factor=1.0,
flip=False,
transforms=[
dict(type='RandomFlip3D'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=['Car', 'Pedestrian', 'Cyclist'],
with_label=False),
dict(type='Collect3D', keys=['img'])
])
],
modality=dict(use_lidar=False, use_camera=True),
use_valid_flag=True,
test_mode=False,
box_type_3d='Camera'),
test=dict(
type='KittiMonoDataset',
data_root='/mnt/data/waymo-set/kitti_format/',
ann_file=
'/mnt/data/waymo-set/kitti_format/waymo_infos_val_mono3d.coco.json',
info_file='/mnt/data/waymo-set/kitti_format/waymo_infos_val.pkl',
img_prefix='/mnt/data/waymo-set/kitti_format/',
classes=['Pedestrian', 'Cyclist', 'Car'],
pipeline=[
dict(type='LoadImageFromFileMono3D'),
dict(
type='MultiScaleFlipAug',
scale_factor=1.0,
flip=False,
transforms=[
dict(type='RandomFlip3D'),
dict(
type='Normalize',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=['Car', 'Pedestrian', 'Cyclist'],
with_label=False),
dict(type='Collect3D', keys=['img'])
])
],
modality=dict(use_lidar=False, use_camera=True),
use_valid_flag=True,
test_mode=True,
box_type_3d='Camera'))
evaluation = dict(interval=2)
model = dict(
type='FCOSMono3D',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe',
init_cfg=dict(
type='Pretrained',
checkpoint='open-mmlab://detectron2/resnet50_caffe'),
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, False, True, True)),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_output',
num_outs=5,
relu_before_extra_convs=True),
bbox_head=dict(
type='FCOSMono3DHead',
num_classes=3,
in_channels=256,
stacked_convs=2,
feat_channels=256,
bbox_code_size=7,
use_direction_classifier=True,
diff_rad_by_sin=True,
pred_attrs=False,
pred_velo=False,
dir_offset=0.7854,
dir_limit_offset=0,
strides=[8, 16, 32, 64, 128],
group_reg_dims=(2, 1, 3, 1, 2),
cls_branch=(256, ),
reg_branch=((256, ), (256, ), (256, ), (256, ), ()),
dir_branch=(256, ),
attr_branch=(256, ),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_attr=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=7),
norm_on_bbox=True,
centerness_on_reg=True,
center_sampling=True,
conv_bias=True,
dcn_on_last_conv=True),
train_cfg=dict(
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
pos_weight=-1,
debug=False),
test_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.8,
score_thr=0.05,
min_bbox_size=0,
max_per_img=200))
optimizer = dict(
type='SGD',
lr=0.001,
momentum=0.9,
weight_decay=0.0001,
paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0))
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.3333333333333333,
step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12)
checkpoint_config = dict(interval=1)
log_config = dict(
interval=50,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = None
load_from = 'work_dirs/fcos3d_r50/epoch_2.pth'
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
total_epochs = 12

`
I am waiting for your reply.

FlyingAnt2018 · 2023-03-20T07:24:18Z

FlyingAnt2018
Mar 20, 2023
Author

resize augmentation is forbidden on mono 3d obj detection.

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

inference error result #2349

Uh oh!

{{title}}

Uh oh!

Uh oh!

{{editor}}'s edit

{{editor}}'s edit

Uh oh!

Replies: 1 comment

Uh oh!

{{title}}

Uh oh!

Select a reply

Uh oh!

inference error result #2349

Uh oh!

Uh oh!

FlyingAnt2018 Mar 14, 2023

Replies: 1 comment

Uh oh!

FlyingAnt2018 Mar 20, 2023 Author

FlyingAnt2018
Mar 14, 2023

FlyingAnt2018
Mar 20, 2023
Author