# optim_wrapper
# AMP
optim_wrapper = dict(
    _delete_=True,
    type='AmpOptimWrapper',
    optimizer=dict(type='SGD', lr=0.05, momentum=0.9, weight_decay=0.0005),
    loss_scale=512.)
bX(e.g.b4/b8)是batchsize