CLIPResNet 训练中,突然上升nan。 #1918
zhaoguoqing12
started this conversation in
General
Replies: 3 comments 1 reply
-
We recommend using English or English & Chinese for issues so that we could have broader discussion. |
Beta Was this translation helpful? Give feedback.
0 replies
-
In practice, CLIPResNet is very sensitive to the learning rate. Apart from a small learning rate, a generally more conservative learning rate warmup/decay strategy is worth trying. |
Beta Was this translation helpful? Give feedback.
1 reply
-
你好,请问你解决这个问题了吗,最终可以达到mmocr中的hmean分数了吗? |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
Prerequisite
Task
I have modified the scripts/configs, or I'm working on my own tasks/models/datasets.
Branch
main branch https://github.com/open-mmlab/mmocr
Environment
无
Reproduces the problem - code sample
model = dict(
type='DBNet',
backbone=dict(
type='CLIPResNet',
init_cfg=dict(
type='Pretrained',
checkpoint=
'https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth'
)),
neck=dict(
type='FPNC',
in_channels=[256, 512, 1024, 2048],
lateral_channels=256,
asf_cfg=dict(attention_type='ScaleChannelSpatial')),
det_head=dict(
type='DBHead',
in_channels=256,
module_loss=dict(type='DBModuleLoss'),
postprocessor=dict(
type='DBPostprocessor', text_repr_type='quad',
epsilon_ratio=0.002)),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_bbox=True,
with_polygon=True,
with_label=True),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=0.12549019607843137,
saturation=0.5),
dict(
type='ImgAugWrapper',
args=[['Fliplr', 0.5], {
'cls': 'Affine',
'rotate': [-10, 10]
}, ['Resize', [0.5, 3.0]]]),
dict(type='RandomCrop', min_side_ratio=0.1),
dict(type='Resize', scale=(640, 640), keep_ratio=True),
dict(type='Pad', size=(640, 640)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape'))
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
'instances'))
]
default_scope = 'mmocr'
env_cfg = dict(
cudnn_benchmark=False,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'))
randomness = dict(seed=None)
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=5),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', interval=10),
sampler_seed=dict(type='DistSamplerSeedHook'),
sync_buffer=dict(type='SyncBuffersHook'),
visualization=dict(
type='VisualizationHook',
interval=1,
enable=False,
show=False,
draw_gt=False,
draw_pred=False))
log_level = 'INFO'
log_processor = dict(type='LogProcessor', window_size=10, by_epoch=True)
load_from = './resnet50-oclip-7ba0c533.pth'
resume = False
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = dict(type='HmeanIOUMetric')
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='TextDetLocalVisualizer',
name='visualizer',
vis_backends=[dict(type='LocalVisBackend')])
icdar2015_textdet_data_root = '/data/guoqing/hand_writting_data/'
icdar2015_textdet_train = dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_train.json',
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=None)
icdar2015_textdet_test = dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_test.json',
test_mode=True,
pipeline=None)
optim_wrapper = dict(
type='OptimWrapper', optimizer=dict(type='Adam', lr=0.0001))
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=500, val_interval=10)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
train_list = [
dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_train.json',
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=None)
]
test_list = [
dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_test.json',
test_mode=True,
pipeline=None)
]
train_dataloader = dict(
batch_size=16,
num_workers=16,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_train.json',
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=None)
],
pipeline=[
dict(
type='LoadImageFromFile',
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_bbox=True,
with_polygon=True,
with_label=True),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=0.12549019607843137,
saturation=0.5),
dict(
type='ImgAugWrapper',
args=[['Fliplr', 0.5], {
'cls': 'Affine',
'rotate': [-10, 10]
}, ['Resize', [0.5, 3.0]]]),
dict(type='RandomCrop', min_side_ratio=0.1),
dict(type='Resize', scale=(640, 640), keep_ratio=True),
dict(type='Pad', size=(640, 640)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape'))
]))
val_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_test.json',
test_mode=True,
pipeline=None)
],
pipeline=[
dict(
type='LoadImageFromFile',
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape',
'scale_factor', 'instances'))
]))
test_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type='OCRDataset',
data_root='/data/guoqing/hand_writting_data/',
ann_file='textdet_test.json',
test_mode=True,
pipeline=None)
],
pipeline=[
dict(
type='LoadImageFromFile',
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape',
'scale_factor', 'instances'))
]))
auto_scale_lr = dict(base_batch_size=16)
param_scheduler = [
dict(type='LinearLR', end=100, start_factor=0.001),
dict(type='PolyLR', power=0.9, eta_min=1e-07, begin=100, end=500)
]
launcher = 'pytorch'
work_dir = './work_dirs/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015'
Reproduces the problem - command or script
CUDA_VISIBLE_DEVICES=1,2 PORT=29500 nohup ./tools/dist_train.sh configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py 2 &
Reproduces the problem - error message
已降低学习率,依然无法收敛,loss nan。
Additional information
自己数据集,已验证不是数据集问题,学习率多次降低调整
Beta Was this translation helpful? Give feedback.
All reactions