first commit

This commit is contained in:
admin
2026-05-20 15:05:35 +08:00
commit ac09b26253
2048 changed files with 189478 additions and 0 deletions

View File

@@ -0,0 +1,68 @@
# dataset settings
dataset_type = 'ADE20KDataset'
data_root = 'data/ade/ADEChallengeData2016'
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(
type='RandomResize',
scale=(2048, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training', seg_map_path='annotations/training'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,68 @@
# dataset settings
dataset_type = 'ADE20KDataset'
data_root = 'data/ade/ADEChallengeData2016'
crop_size = (640, 640)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(
type='RandomResize',
scale=(2560, 640),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2560, 640), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training', seg_map_path='annotations/training'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,70 @@
# dataset settings
dataset_type = 'BDD100KDataset'
data_root = 'data/bdd100k/'
crop_size = (512, 1024)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 1024),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/10k/train',
seg_map_path='labels/sem_seg/masks/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/10k/val',
seg_map_path='labels/sem_seg/masks/val'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,75 @@
# dataset settings
dataset_type = 'ChaseDB1Dataset'
data_root = 'data/CHASE_DB1'
img_scale = (960, 999)
crop_size = (128, 128)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=img_scale,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=40000,
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training',
seg_map_path='annotations/training'),
pipeline=train_pipeline)))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,67 @@
# dataset settings
dataset_type = 'CityscapesDataset'
data_root = 'data/cityscapes/'
crop_size = (512, 1024)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 1024),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='leftImg8bit/train', seg_map_path='gtFine/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,29 @@
_base_ = './cityscapes.py'
crop_size = (1024, 1024)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 1024),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,29 @@
_base_ = './cityscapes.py'
crop_size = (768, 768)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2049, 1025),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,29 @@
_base_ = './cityscapes.py'
crop_size = (769, 769)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2049, 1025),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,29 @@
_base_ = './cityscapes.py'
crop_size = (832, 832)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 1024),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,69 @@
# dataset settings
dataset_type = 'COCOStuffDataset'
data_root = 'data/coco_stuff10k'
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(
type='RandomResize',
scale=(2048, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
reduce_zero_label=True,
data_prefix=dict(
img_path='images/train2014', seg_map_path='annotations/train2014'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
reduce_zero_label=True,
data_prefix=dict(
img_path='images/test2014', seg_map_path='annotations/test2014'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,67 @@
# dataset settings
dataset_type = 'COCOStuffDataset'
data_root = 'data/coco_stuff164k'
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/train2017', seg_map_path='annotations/train2017'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/val2017', seg_map_path='annotations/val2017'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,73 @@
# dataset settings
dataset_type = 'DRIVEDataset'
data_root = 'data/DRIVE'
img_scale = (584, 565)
crop_size = (64, 64)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=img_scale,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=40000,
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training',
seg_map_path='annotations/training'),
pipeline=train_pipeline)))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,73 @@
# dataset settings
dataset_type = 'HRFDataset'
data_root = 'data/HRF'
img_scale = (2336, 3504)
crop_size = (256, 256)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=img_scale,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=40000,
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training',
seg_map_path='annotations/training'),
pipeline=train_pipeline)))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,53 @@
train_pipeline = [
dict(type='LoadImageFromNpyFile'),
dict(type='LoadAnnotations'),
dict(type='RandomCrop', crop_size=(192, 384)),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromNpyFile'),
dict(type='RandomCrop', crop_size=(192, 384)),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict(
batch_size=4,
num_workers=1,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type='HSIDrive20Dataset',
data_root='data/HSIDrive20',
data_prefix=dict(
img_path='images/training', seg_map_path='annotations/training'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='HSIDrive20Dataset',
data_root='data/HSIDrive20',
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=test_pipeline))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='HSIDrive20Dataset',
data_root='data/HSIDrive20',
data_prefix=dict(
img_path='images/test', seg_map_path='annotations/test'),
pipeline=test_pipeline))
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'], ignore_index=0)
test_evaluator = val_evaluator

View File

@@ -0,0 +1,73 @@
# dataset settings
dataset_type = 'iSAIDDataset'
data_root = 'data/iSAID'
"""
This crop_size setting is followed by the implementation of
`PointFlow: Flowing Semantics Through Points for Aerial Image
Segmentation <https://arxiv.org/pdf/2103.06564.pdf>`_.
"""
crop_size = (896, 896)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(896, 896),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(896, 896), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='img_dir/train', seg_map_path='ann_dir/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,68 @@
# dataset settings
dataset_type = 'LEVIRCDDataset'
data_root = r'data/LEVIRCD'
albu_train_transforms = [
dict(type='RandomBrightnessContrast', p=0.2),
dict(type='HorizontalFlip', p=0.5),
dict(type='VerticalFlip', p=0.5)
]
train_pipeline = [
dict(type='LoadMultipleRSImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='Albu',
keymap={
'img': 'image',
'img2': 'image2',
'gt_seg_map': 'mask'
},
transforms=albu_train_transforms,
additional_targets={'image2': 'image'},
bgr_to_rgb=False),
dict(type='ConcatCDInput'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadMultipleRSImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='ConcatCDInput'),
dict(type='PackSegInputs')
]
tta_pipeline = [
dict(type='LoadMultipleRSImageFromFile'),
dict(
type='TestTimeAug',
transforms=[[dict(type='LoadAnnotations')],
[dict(type='ConcatCDInput')],
[dict(type='PackSegInputs')]])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='train/A',
img_path2='train/B',
seg_map_path='train/label'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='test/A', img_path2='test/B', seg_map_path='test/label'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,66 @@
# dataset settings
dataset_type = 'LoveDADataset'
data_root = 'data/loveDA'
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(
type='RandomResize',
scale=(2048, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='img_dir/train', seg_map_path='ann_dir/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,68 @@
# dataset settings
dataset_type = 'MapillaryDataset_v1'
data_root = 'data/mapillary/'
crop_size = (512, 1024)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 1024),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=2,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='training/images', seg_map_path='training/v1.2/labels'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='validation/images',
seg_map_path='validation/v1.2/labels'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,37 @@
# dataset settings
_base_ = './mapillary_v1.py'
metainfo = dict(
classes=('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier',
'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking',
'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane',
'Sidewalk', 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist',
'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk',
'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow',
'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', 'Bike Rack',
'Billboard', 'Catch Basin', 'CCTV Camera', 'Fire Hydrant',
'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole',
'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole',
'Traffic Light', 'Traffic Sign (Back)', 'Traffic Sign (Front)',
'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan',
'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', 'Truck',
'Wheeled Slow', 'Car Mount', 'Ego Vehicle'),
palette=[[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153],
[180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255],
[140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96],
[230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232],
[150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60],
[255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128],
[255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180],
[190, 255, 255], [152, 251, 152], [107, 142, 35], [0, 170, 30],
[255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220],
[220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40],
[33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150],
[210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80],
[250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20],
[119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142],
[0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110],
[0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, 10]])
train_dataloader = dict(dataset=dict(metainfo=metainfo))
val_dataloader = dict(dataset=dict(metainfo=metainfo))
test_dataloader = val_dataloader

View File

@@ -0,0 +1,68 @@
# dataset settings
dataset_type = 'MapillaryDataset_v2'
data_root = 'data/mapillary/'
crop_size = (512, 1024)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 1024),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=2,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='training/images', seg_map_path='training/v2.0/labels'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='validation/images',
seg_map_path='validation/v2.0/labels'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,64 @@
# dataset settings
dataset_class_name = 'MyDataset' # TODO 上一步中你定义的数据集的名字
data_root = '/home/audience/Desktop/Seg_data/Data' # TODO 数据集存储路径
# img_norm_cfg = dict(
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
img_scale = (1920, 1080) # img_scale图像尺寸 TODO (1920,1080)
crop_size = (512, 512) # 数据增强时裁剪的大小 TODO 之后可以修改
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
# dict(type='GenerateEdge', edge_width=4), # For pidnet
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict( # Train dataloader config
batch_size=4, # Batch size of a single GPU TODO
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
dataset=dict( # Train dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='A_Ori',
seg_map_path='A_Label_GT_label_fold'),
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
val_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='A_Ori',
seg_map_path='A_Label_GT_label_fold'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
test_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='A_Ori',
seg_map_path='A_Label_GT_label_fold'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
# The metric to measure the accuracy. Here, we use IoUMetric.
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,64 @@
# dataset settings
dataset_class_name = 'MyDataset_model' # TODO 上一步中你定义的数据集的名字
data_root = '/home/wkmgc/Desktop/Seg/Seg_All_In_One_MMSeg/My_Data' # TODO 数据集存储路径
# img_norm_cfg = dict(
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
img_scale = (1920, 1080) # img_scale图像尺寸 TODO (1920,1080)
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
# dict(type='GenerateEdge', edge_width=4), # For pidnet
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict( # Train dataloader config
batch_size=16, # Batch size of a single GPU TODO
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
dataset=dict( # Train dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='A_Ori',
seg_map_path='A_Label_GT_label_fold'),
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
val_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='A_Ori',
seg_map_path='A_Label_GT_label_fold'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
test_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='A_Ori',
seg_map_path='A_Label_GT_label_fold'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
# The metric to measure the accuracy. Here, we use IoUMetric.
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,67 @@
# dataset settings
dataset_type = 'NYUDataset'
data_root = 'data/nyu'
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
dict(type='RandomDepthMix', prob=0.25),
dict(type='RandomFlip', prob=0.5),
dict(type='RandomCrop', crop_size=(480, 480)),
dict(
type='Albu',
transforms=[
dict(type='RandomBrightnessContrast'),
dict(type='RandomGamma'),
dict(type='HueSaturationValue'),
]),
dict(
type='PackSegInputs',
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
'category_id')),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2000, 480), keep_ratio=True),
dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
dict(
type='PackSegInputs',
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
'category_id'))
]
train_dataloader = dict(
batch_size=8,
num_workers=8,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/train', depth_map_path='annotations/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
test_mode=True,
data_prefix=dict(
img_path='images/test', depth_map_path='annotations/test'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(
type='DepthMetric',
min_depth_eval=0.001,
max_depth_eval=10.0,
crop_type='nyu_crop')
test_evaluator = val_evaluator

View File

@@ -0,0 +1,72 @@
# dataset settings
dataset_type = 'NYUDataset'
data_root = 'data/nyu'
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
dict(type='RandomDepthMix', prob=0.25),
dict(type='RandomFlip', prob=0.5),
dict(
type='RandomResize',
scale=(768, 512),
ratio_range=(0.8, 1.5),
keep_ratio=True),
dict(type='RandomCrop', crop_size=(512, 512)),
dict(
type='Albu',
transforms=[
dict(type='RandomBrightnessContrast'),
dict(type='RandomGamma'),
dict(type='HueSaturationValue'),
]),
dict(
type='PackSegInputs',
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
'category_id')),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
dict(
type='PackSegInputs',
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
'category_id'))
]
train_dataloader = dict(
batch_size=8,
num_workers=8,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/train', depth_map_path='annotations/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
test_mode=True,
data_prefix=dict(
img_path='images/test', depth_map_path='annotations/test'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(
type='DepthMetric',
min_depth_eval=0.001,
max_depth_eval=10.0,
crop_type='nyu_crop')
test_evaluator = val_evaluator

View File

@@ -0,0 +1,56 @@
# dataset settings
dataset_type = 'PascalContextDataset'
data_root = 'data/VOCdevkit/VOC2010/'
img_scale = (520, 520)
crop_size = (480, 480)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=img_scale,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
ann_file='ImageSets/SegmentationContext/train.txt',
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
ann_file='ImageSets/SegmentationContext/val.txt',
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,72 @@
# dataset settings
dataset_type = 'PascalContextDataset59'
data_root = 'data/VOCdevkit/VOC2010/'
img_scale = (520, 520)
crop_size = (480, 480)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(
type='RandomResize',
scale=img_scale,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
ann_file='ImageSets/SegmentationContext/train.txt',
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
ann_file='ImageSets/SegmentationContext/val.txt',
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,69 @@
# dataset settings
dataset_type = 'PascalVOCDataset'
data_root = 'data/VOCdevkit/VOC2012'
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='JPEGImages', seg_map_path='SegmentationClass'),
ann_file='ImageSets/Segmentation/train.txt',
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='JPEGImages', seg_map_path='SegmentationClass'),
ann_file='ImageSets/Segmentation/val.txt',
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,81 @@
# dataset settings
dataset_type = 'PascalVOCDataset'
data_root = 'data/VOCdevkit/VOC2012'
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=(2048, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='Pad', size=crop_size),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
dataset_train = dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='JPEGImages', seg_map_path='SegmentationClass'),
ann_file='ImageSets/Segmentation/train.txt',
pipeline=train_pipeline)
dataset_aug = dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='JPEGImages', seg_map_path='SegmentationClassAug'),
ann_file='ImageSets/Segmentation/aug.txt',
pipeline=train_pipeline)
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(type='ConcatDataset', datasets=[dataset_train, dataset_aug]))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='JPEGImages', seg_map_path='SegmentationClass'),
ann_file='ImageSets/Segmentation/val.txt',
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,66 @@
# dataset settings
dataset_type = 'PotsdamDataset'
data_root = 'data/potsdam'
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(
type='RandomResize',
scale=(512, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(512, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='img_dir/train', seg_map_path='ann_dir/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,64 @@
# dataset settings
dataset_class_name = 'PublicDataSet_AutoLaparo' # TODO 上一步中你定义的数据集的名字
data_root = '/home/wkmgc/Desktop/Seg/DataSet_Public/2_AutoLaparo-10Type-1920x1080' # TODO 数据集存储路径
# img_norm_cfg = dict(
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
img_scale = (1920, 1080) # img_scale图像尺寸 TODO (1920,1080)
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
# dict(type='GenerateEdge', edge_width=4), # For pidnet
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict( # Train dataloader config
batch_size=16, # Batch size of a single GPU TODO
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
dataset=dict( # Train dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/train',
seg_map_path='labels_GT/train'),
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
val_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/val',
seg_map_path='labels_GT/val'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
test_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/val',
seg_map_path='labels_GT/val'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
# The metric to measure the accuracy. Here, we use IoUMetric.
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,64 @@
# dataset settings
dataset_class_name = 'PublicDataSet_CholecSeg8k' # TODO 上一步中你定义的数据集的名字
data_root = '/home/wkmgc/Desktop/Seg/DataSet_Public/1_CholecSeg8k-13Type-1920x1080' # TODO 数据集存储路径
# img_norm_cfg = dict(
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
img_scale = (1920, 1080) # img_scale图像尺寸 TODO (1920,1080)
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
# dict(type='GenerateEdge', edge_width=4), # For pidnet
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict( # Train dataloader config
batch_size=16, # Batch size of a single GPU TODO
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
dataset=dict( # Train dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/train',
seg_map_path='labels_GT/train'),
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
val_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/val',
seg_map_path='labels_GT/val'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
test_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/val',
seg_map_path='labels_GT/val'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
# The metric to measure the accuracy. Here, we use IoUMetric.
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,64 @@
# dataset settings
dataset_class_name = 'PublicDataSet_Dresden' # TODO 上一步中你定义的数据集的名字
data_root = '/home/wkmgc/Desktop/Seg/DataSet_Public/4_Dresden-11Type-512x512' # TODO 数据集存储路径
# img_norm_cfg = dict(
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
img_scale = (512, 512) # img_scale图像尺寸 TODO (1920,1080)
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
# dict(type='GenerateEdge', edge_width=4), # For pidnet
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict( # Train dataloader config
batch_size=16, # Batch size of a single GPU TODO
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
dataset=dict( # Train dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/train',
seg_map_path='labels_GT/train'),
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
val_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/val',
seg_map_path='labels_GT/val'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
test_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/test',
seg_map_path='labels_GT/test'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
# The metric to measure the accuracy. Here, we use IoUMetric.
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,64 @@
# dataset settings
dataset_class_name = 'PublicDataSet_Endovis_2017' # TODO 上一步中你定义的数据集的名字
data_root = '/home/wkmgc/Desktop/Seg/DataSet_Public/3_1_Endovis_2017-8Type-512x512' # TODO 数据集存储路径
# img_norm_cfg = dict(
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
img_scale = (512, 512) # img_scale图像尺寸 TODO (1920,1080)
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
# dict(type='GenerateEdge', edge_width=4), # For pidnet
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict( # Train dataloader config
batch_size=16, # Batch size of a single GPU TODO
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
dataset=dict( # Train dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/train',
seg_map_path='labels_GT/train'),
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
val_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/val',
seg_map_path='labels_GT/val'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
test_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/val',
seg_map_path='labels_GT/val'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
# The metric to measure the accuracy. Here, we use IoUMetric.
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,64 @@
# dataset settings
dataset_class_name = 'PublicDataSet_Endovis_2018' # TODO 上一步中你定义的数据集的名字
data_root = '/home/wkmgc/Desktop/Seg/DataSet_Public/3_2_Endovis_2018-8Type-512x512' # TODO 数据集存储路径
# img_norm_cfg = dict(
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
img_scale = (512, 512) # img_scale图像尺寸 TODO (1920,1080)
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
# dict(type='GenerateEdge', edge_width=4), # For pidnet
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict( # Train dataloader config
batch_size=16, # Batch size of a single GPU TODO
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
dataset=dict( # Train dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/train',
seg_map_path='labels_GT/train'),
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
val_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/val',
seg_map_path='labels_GT/val'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
test_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='images/val',
seg_map_path='labels_GT/val'),
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
# The metric to measure the accuracy. Here, we use IoUMetric.
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,90 @@
# dataset settings
dataset_type = 'REFUGEDataset'
data_root = 'data/REFUGE'
train_img_scale = (2056, 2124)
val_img_scale = (1634, 1634)
test_img_scale = (1634, 1634)
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=False),
dict(
type='RandomResize',
scale=train_img_scale,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=val_img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=False),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=test_img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=False),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training', seg_map_path='annotations/training'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=val_pipeline))
test_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/test', seg_map_path='annotations/test'),
pipeline=val_pipeline))
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,73 @@
# dataset settings
dataset_type = 'STAREDataset'
data_root = 'data/STARE'
img_scale = (605, 700)
crop_size = (128, 128)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(
type='RandomResize',
scale=img_scale,
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=40000,
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/training',
seg_map_path='annotations/training'),
pipeline=train_pipeline)))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='images/validation',
seg_map_path='annotations/validation'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,41 @@
dataset_type = 'SynapseDataset'
data_root = 'data/synapse/'
img_scale = (224, 224)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
dict(type='RandomRotFlip', rotate_prob=0.5, flip_prob=0.5, degree=20),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=img_scale, keep_ratio=True),
dict(type='LoadAnnotations'),
dict(type='PackSegInputs')
]
train_dataloader = dict(
batch_size=6,
num_workers=2,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='img_dir/train', seg_map_path='ann_dir/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,66 @@
# dataset settings
dataset_type = 'ISPRSDataset'
data_root = 'data/vaihingen'
crop_size = (512, 512)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(
type='RandomResize',
scale=(512, 512),
ratio_range=(0.5, 2.0),
keep_ratio=True),
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
dict(type='RandomFlip', prob=0.5),
dict(type='PhotoMetricDistortion'),
dict(type='PackSegInputs')
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=(512, 512), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations', reduce_zero_label=True),
dict(type='PackSegInputs')
]
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=None),
dict(
type='TestTimeAug',
transforms=[
[
dict(type='Resize', scale_factor=r, keep_ratio=True)
for r in img_ratios
],
[
dict(type='RandomFlip', prob=0., direction='horizontal'),
dict(type='RandomFlip', prob=1., direction='horizontal')
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='InfiniteSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
img_path='img_dir/train', seg_map_path='ann_dir/train'),
pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator

View File

@@ -0,0 +1,15 @@
default_scope = 'mmseg'
env_cfg = dict(
cudnn_benchmark=True,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
log_processor = dict(by_epoch=False)
log_level = 'INFO'
load_from = None
resume = False
tta_model = dict(type='SegTTAModel')

View File

@@ -0,0 +1,54 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='ANNHead',
in_channels=[1024, 2048],
in_index=[2, 3],
channels=512,
project_channels=256,
query_scales=(1, ),
key_pool_scales=(1, 3, 6, 8),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,52 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='APCHead',
in_channels=2048,
in_index=3,
channels=512,
pool_scales=(1, 2, 3, 6),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=dict(type='SyncBN', requires_grad=True),
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,78 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='BiSeNetV1',
in_channels=3,
context_channels=(128, 256, 512), # (512, 1024, 2048), # (512, 1024, 2048),
spatial_channels=(64, 64, 64, 128), # (256, 256, 256, 512), # (256, 256, 256, 512),
out_indices=(0, 1, 2),
out_channels=256, # 1024, # 1024,
backbone_cfg=dict(
type='ResNet',
in_channels=3,
depth=18, # 50, # 101,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 1, 1),
strides=(1, 2, 2, 2),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True
# init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')
),
norm_cfg=norm_cfg,
align_corners=False,
init_cfg=None),
decode_head=dict(
type='FCNHead',
in_channels=256, # 1024 # 1024
in_index=0,
channels=256, # 1024 # 1024
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=128, # 512
channels=64, # 256
num_convs=1,
num_classes=19,
in_index=1,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=128, # 512 # 512
channels=64, # 256 # 256
num_convs=1,
num_classes=19,
in_index=2,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,88 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='BiSeNetV2',
detail_channels=(64, 64, 128),
semantic_channels=(16, 32, 64, 128),
semantic_expansion_ratio=6,
bga_channels=128,
out_indices=(0, 1, 2, 3, 4),
init_cfg=None,
align_corners=False),
decode_head=dict(
type='FCNHead',
in_channels=128,
in_index=0,
channels=1024,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=16,
channels=16,
num_convs=2,
num_classes=19,
in_index=1,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=32,
channels=64,
num_convs=2,
num_classes=19,
in_index=2,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=64,
channels=256,
num_convs=2,
num_classes=19,
in_index=3,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=128,
channels=1024,
num_convs=2,
num_classes=19,
in_index=4,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,88 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='BiSeNetV2',
detail_channels=(128, 128, 256), # (64, 64, 128)
semantic_channels=(32, 64, 128, 256), # (16, 32, 64, 128)
semantic_expansion_ratio=6,
bga_channels=256, # 128
out_indices=(0, 1, 2, 3, 4),
init_cfg=None,
align_corners=False),
decode_head=dict(
type='FCNHead',
in_channels=256, # 128
in_index=0,
channels=1024,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=32, # 16
channels=16,
num_convs=2,
num_classes=19,
in_index=1,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=64, # 32
channels=64,
num_convs=2,
num_classes=19,
in_index=2,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=128, # 64
channels=256,
num_convs=2,
num_classes=19,
in_index=3,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=256, # 128
channels=1024,
num_convs=2,
num_classes=19,
in_index=4,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,52 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='CCHead',
in_channels=2048,
in_index=3,
channels=512,
recurrence=2,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,44 @@
# model settings
norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[72.39239876, 82.90891754, 73.15835921],
std=[1, 1, 1],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='CGNet',
norm_cfg=norm_cfg,
in_channels=3,
num_channels=(32, 64, 128),
num_blocks=(3, 21),
dilations=(2, 4),
reductions=(8, 16)),
decode_head=dict(
type='FCNHead',
in_channels=256,
in_index=2,
channels=256,
num_convs=0,
concat_input=False,
dropout_ratio=0,
num_classes=19,
norm_cfg=norm_cfg,
loss_decode=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0,
class_weight=[
2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
10.396974, 10.055647
]
)),
# model training and testing settings
train_cfg=dict(sampler=None),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,52 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='DAHead',
in_channels=2048,
in_index=3,
channels=512,
pam_channels=64,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,55 @@
# The class_weight is borrowed from https://github.com/openseg-group/OCNet.pytorch/issues/14 # noqa
# Licensed under the MIT License
# class_weight = [
# 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786,
# 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 1.0865, 1.1529,
# 1.0507
# ]
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/pretrain/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa
# crop_size = (1024, 1024)
data_preprocessor = dict(
type='SegDataPreProcessor',
size=(1024, 1024),
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='DDRNet',
in_channels=3,
channels=64,
ppm_channels=128,
norm_cfg=norm_cfg,
align_corners=False,
init_cfg=dict(type='Pretrained', checkpoint=checkpoint)),
decode_head=dict(
type='DDRHead',
in_channels=64 * 4,
channels=128,
dropout_ratio=0.,
num_classes=19,
align_corners=False,
norm_cfg=norm_cfg,
loss_decode=[
dict(
type='OhemCrossEntropy',
thres=0.9,
min_kept=131072,
# class_weight=class_weight,
loss_weight=1.0),
dict(
type='OhemCrossEntropy',
thres=0.9,
min_kept=131072,
# class_weight=class_weight,s
loss_weight=0.4),
]),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,52 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='ASPPHead',
in_channels=2048,
in_index=3,
channels=512,
dilations=(1, 12, 24, 36),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,58 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='UNet',
in_channels=3,
base_channels=64,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
dec_num_convs=(2, 2, 2, 2),
downsamples=(True, True, True, True),
enc_dilations=(1, 1, 1, 1, 1),
dec_dilations=(1, 1, 1, 1),
with_cp=False,
conv_cfg=None,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU'),
upsample_cfg=dict(type='InterpConv'),
norm_eval=False),
decode_head=dict(
type='ASPPHead',
in_channels=64,
in_index=4,
channels=16,
dilations=(1, 12, 24, 36),
dropout_ratio=0.1,
num_classes=2,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=128,
in_index=3,
channels=64,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=2,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='slide', crop_size=256, stride=170))

View File

@@ -0,0 +1,54 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='DepthwiseSeparableASPPHead',
in_channels=2048,
in_index=3,
channels=512,
dilations=(1, 12, 24, 36),
c1_in_channels=256,
c1_channels=48,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,52 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='DMHead',
in_channels=2048,
in_index=3,
channels=512,
filter_sizes=(1, 3, 5, 7),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=dict(type='SyncBN', requires_grad=True),
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,54 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='DNLHead',
in_channels=2048,
in_index=3,
channels=512,
dropout_ratio=0.1,
reduction=2,
use_scale=True,
mode='embedded_gaussian',
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,39 @@
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
backbone=dict(
type='VisionTransformer',
img_size=224,
embed_dims=768,
num_layers=12,
num_heads=12,
out_indices=(2, 5, 8, 11),
final_norm=False,
with_cls_token=True,
output_cls_token=True),
decode_head=dict(
type='DPTHead',
in_channels=(768, 768, 768, 768),
channels=256,
embed_dims=768,
post_process_channels=[96, 192, 384, 768],
num_classes=150,
readout_type='project',
input_transform='multiple_select',
in_index=(0, 1, 2, 3),
norm_cfg=norm_cfg,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=None,
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole')) # yapf: disable

View File

@@ -0,0 +1,55 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='EMAHead',
in_channels=2048,
in_index=3,
channels=256,
ema_channels=512,
num_bases=64,
num_stages=3,
momentum=0.1,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,92 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='EnBiSeNetV2',
in_channels=3,
detail_channels_stages=(64, 64, 128),
semantic_channels_stages=(16, 32, 64, 128),
semantic_expansion_ratio=6,
bga_channels=128,
out_indices=(0, 1, 2, 3, 4),
init_cfg=None,
align_corners=False),
decode_head=dict(
type='FCNHead',
in_channels=128, # Should match bga_channels
in_index=4, # The final output from backbone
channels=1024,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=128, # from detail branch
in_index=0,
channels=256,
num_convs=2,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=64, # from semantic branch stage 3
in_index=1,
channels=256,
num_convs=2,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=128, # from semantic branch stage 4
in_index=2,
channels=256,
num_convs=2,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=128, # from semantic fused output
in_index=3,
channels=256,
num_convs=2,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,56 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='EncHead',
in_channels=[512, 1024, 2048],
in_index=(1, 2, 3),
channels=512,
num_codes=32,
use_se_loss=True,
add_lateral=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_se_decode=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,40 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='ERFNet',
in_channels=3,
enc_downsample_channels=(16, 64, 128),
enc_stage_non_bottlenecks=(5, 8),
enc_non_bottleneck_dilations=(2, 4, 8, 16),
enc_non_bottleneck_channels=(64, 128),
dec_upsample_channels=(64, 16),
dec_stages_non_bottleneck=(2, 2),
dec_non_bottleneck_channels=(64, 16),
dropout_ratio=0.1,
init_cfg=None),
decode_head=dict(
type='FCNHead',
in_channels=16,
channels=128,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,65 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='FastSCNN',
downsample_dw_channels=(32, 48),
global_in_channels=64,
global_block_channels=(64, 96, 128),
global_block_strides=(2, 2, 1),
global_out_channels=128,
higher_in_channels=64,
lower_in_channels=128,
fusion_out_channels=128,
out_indices=(0, 1, 2),
norm_cfg=norm_cfg,
align_corners=False),
decode_head=dict(
type='DepthwiseSeparableFCNHead',
in_channels=128,
channels=128,
concat_input=False,
num_classes=19,
in_index=-1,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=128,
channels=32,
num_convs=1,
num_classes=19,
in_index=-2,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
dict(
type='FCNHead',
in_channels=64,
channels=32,
num_convs=1,
num_classes=19,
in_index=-3,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,61 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c', # 'open-mmlab://resnet18_v1c', 'open-mmlab://resnet50_v1c' 'open-mmlab://resnet101_v1c'
backbone=dict(
type='ResNetV1c',
depth=50, # 18, 50, 101,
num_stages=4,
dilations=(1, 1, 2, 4),
strides=(1, 2, 2, 2),
out_indices=(1, 2, 3),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
neck=dict(
type='JPU',
in_channels=(512, 1024, 2048), # (128, 256, 512), (512, 1024, 2048), (512, 1024, 2048)
mid_channels=512, # 128, 512, 512
start_level=0,
end_level=-1,
dilations=(1, 2, 4, 8),
align_corners=False,
norm_cfg=norm_cfg),
decode_head=dict(
type='PSPHead',
in_channels=2048, # 512, 2048, 2048
in_index=2,
channels=512, # 128, 512, 512
pool_scales=(1, 2, 3, 6),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024, # 256, 1024, 1024
in_index=1,
channels=256, # 64, 256, 256
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,60 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://msra/hrnetv2_w18',
backbone=dict(
type='HRNet',
norm_cfg=norm_cfg,
norm_eval=False,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(18, 36)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(18, 36, 72)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(18, 36, 72, 144)))),
decode_head=dict(
type='FCNHead',
in_channels=[18, 36, 72, 144],
in_index=(0, 1, 2, 3),
channels=sum([18, 36, 72, 144]),
input_transform='resize_concat',
kernel_size=1,
num_convs=1,
concat_input=False,
dropout_ratio=-1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,53 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='FCNHead',
in_channels=2048,
in_index=3,
channels=512,
num_convs=2,
concat_input=True,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,59 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='UNet',
in_channels=3,
base_channels=64,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
dec_num_convs=(2, 2, 2, 2),
downsamples=(True, True, True, True),
enc_dilations=(1, 1, 1, 1, 1),
dec_dilations=(1, 1, 1, 1),
with_cp=False,
conv_cfg=None,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU'),
upsample_cfg=dict(type='InterpConv'),
norm_eval=False),
decode_head=dict(
type='FCNHead',
in_channels=64,
in_index=4,
channels=64,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=2,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=128,
in_index=3,
channels=64,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=2,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='slide', crop_size=256, stride=170))

View File

@@ -0,0 +1,54 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth' # noqa
# TODO: delete custom_imports after mmpretrain supports auto import
# please install mmpretrain >= 1.0.0rc7
# import mmpretrain.models to trigger register_module in mmpretrain
custom_imports = dict(
imports=['mmpretrain.models'], allow_failed_imports=False)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='mmpretrain.PoolFormer',
arch='s12',
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'),
in_patch_size=7,
in_stride=4,
in_pad=2,
down_patch_size=3,
down_stride=2,
down_pad=1,
drop_rate=0.,
drop_path_rate=0.,
out_indices=(0, 2, 4, 6),
frozen_stages=0,
),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=4),
decode_head=dict(
type='FPNHead',
in_channels=[256, 256, 256, 256],
in_index=[0, 1, 2, 3],
feature_strides=[4, 8, 16, 32],
channels=128,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,44 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 1, 1),
strides=(1, 2, 2, 2),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=4),
decode_head=dict(
type='FPNHead',
in_channels=[256, 256, 256, 256],
in_index=[0, 1, 2, 3],
feature_strides=[4, 8, 16, 32],
channels=128,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,54 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='GCHead',
in_channels=2048,
in_index=3,
channels=512,
ratio=1 / 4.,
pooling_type='att',
fusion_types=('channel_add', ),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,82 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='ICNet',
backbone_cfg=dict(
type='ResNetV1c',
in_channels=3,
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
in_channels=3,
layer_channels=(512, 2048),
light_branch_middle_channels=32,
psp_out_channels=512,
out_channels=(64, 256, 256),
norm_cfg=norm_cfg,
align_corners=False,
),
neck=dict(
type='ICNeck',
in_channels=(64, 256, 256),
out_channels=128,
norm_cfg=norm_cfg,
align_corners=False),
decode_head=dict(
type='FCNHead',
in_channels=128,
channels=128,
num_convs=1,
in_index=2,
dropout_ratio=0,
num_classes=19,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=128,
channels=128,
num_convs=1,
num_classes=19,
in_index=0,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='FCNHead',
in_channels=128,
channels=128,
num_convs=1,
num_classes=19,
in_index=1,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,53 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='ISAHead',
in_channels=2048,
in_index=3,
channels=512,
isa_channels=256,
down_factor=(8, 8),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,83 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
# model settings
num_stages = 3
conv_kernel_size = 1
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='IterativeDecodeHead',
num_stages=num_stages,
kernel_update_head=[
dict(
type='KernelUpdateHead',
num_classes=150,
num_ffn_fcs=2,
num_heads=8,
num_mask_fcs=1,
feedforward_channels=2048,
in_channels=512,
out_channels=512,
dropout=0.0,
conv_kernel_size=conv_kernel_size,
ffn_act_cfg=dict(type='ReLU', inplace=True),
with_ffn=True,
feat_transform_cfg=dict(
conv_cfg=dict(type='Conv2d'), act_cfg=None),
kernel_updator_cfg=dict(
type='KernelUpdator',
in_channels=256,
feat_channels=256,
out_channels=256,
act_cfg=dict(type='ReLU', inplace=True),
norm_cfg=dict(type='LN'))) for _ in range(num_stages)
],
kernel_generate_head=dict(
type='ASPPHead',
# in_channels=2048,
# in_index=3,
# channels=512,
# dilations=(1, 12, 24, 36),
# dropout_ratio=0.1,
num_classes=150,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=150,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,33 @@
# model settings
norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='MobileNetV3',
arch='large',
out_indices=(1, 3, 16),
norm_cfg=norm_cfg),
decode_head=dict(
type='LRASPPHead',
in_channels=(16, 24, 960),
in_index=(0, 1, 2),
channels=128,
input_transform='multiple_select',
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU'),
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,136 @@
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='SwinTransformer',
# pretrain_img_size=384,
# embed_dims=128,
# depths=[2, 2, 18, 2],
# num_heads=[4, 8, 16, 32],
# window_size=12,
# mlp_ratio=4,
# qkv_bias=True,
# qk_scale=None,
# drop_rate=0.,
# attn_drop_rate=0.,
# drop_path_rate=0.3,
# patch_norm=True,
out_indices=(0, 1, 2, 3),
# with_cp=False,
frozen_stages=-1,
init_cfg=dict(type='Pretrained', checkpoint='https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth')),
decode_head=dict(
type='Mask2FormerHead',
in_channels=[128, 256, 512, 1024], # TODO
strides=[4, 8, 16, 32],
feat_channels=256,
out_channels=256,
num_classes=150, # TODO
num_queries=100,
num_transformer_feat_level=3,
align_corners=False,
pixel_decoder=dict(
type='mmdet.MSDeformAttnPixelDecoder',
num_outs=3,
norm_cfg=dict(type='GN', num_groups=32),
act_cfg=dict(type='ReLU'),
encoder=dict( # DeformableDetrTransformerEncoder
num_layers=6,
layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
self_attn_cfg=dict( # MultiScaleDeformableAttention
embed_dims=256,
num_heads=8,
num_levels=3,
num_points=4,
im2col_step=64,
dropout=0.0,
batch_first=True,
norm_cfg=None,
init_cfg=None),
ffn_cfg=dict(
embed_dims=256,
feedforward_channels=1024,
num_fcs=2,
ffn_drop=0.0,
act_cfg=dict(type='ReLU', inplace=True))),
init_cfg=None),
positional_encoding=dict( # SinePositionalEncoding
num_feats=128, normalize=True),
init_cfg=None),
enforce_decoder_input_project=False,
positional_encoding=dict( # SinePositionalEncoding
num_feats=128, normalize=True),
transformer_decoder=dict( # Mask2FormerTransformerDecoder
return_intermediate=True,
num_layers=9,
layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
self_attn_cfg=dict( # MultiheadAttention
embed_dims=256,
num_heads=8,
attn_drop=0.0,
proj_drop=0.0,
dropout_layer=None,
batch_first=True),
cross_attn_cfg=dict( # MultiheadAttention
embed_dims=256,
num_heads=8,
attn_drop=0.0,
proj_drop=0.0,
dropout_layer=None,
batch_first=True),
ffn_cfg=dict(
embed_dims=256,
feedforward_channels=2048,
num_fcs=2,
act_cfg=dict(type='ReLU', inplace=True),
ffn_drop=0.0,
dropout_layer=None,
add_identity=True)),
init_cfg=None),
loss_cls=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
loss_weight=2.0,
reduction='mean',
class_weight=[1.0] * 150 + [0.1]), # TODO
loss_mask=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='mean',
loss_weight=5.0),
loss_dice=dict(
type='mmdet.DiceLoss',
use_sigmoid=True,
activate=True,
reduction='mean',
naive_dice=True,
eps=1.0,
loss_weight=5.0),
train_cfg=dict(
num_points=12544,
oversample_ratio=3.0,
importance_sample_ratio=0.75,
assigner=dict(
type='mmdet.HungarianAssigner',
match_costs=[
dict(type='mmdet.ClassificationCost', weight=2.0),
dict(
type='mmdet.CrossEntropyLossCost',
weight=5.0,
use_sigmoid=True),
dict(
type='mmdet.DiceCost',
weight=5.0,
pred_act=True,
eps=1.0)
]),
sampler=dict(type='mmdet.MaskPseudoSampler'))),
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,108 @@
data_preprocessor = dict(
type='SegDataPreProcessor',
# size=crop_size,
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
# model_cfg
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='ResNet',
# depth=50,
# num_stages=4,
out_indices=(0, 1, 2, 3),
# dilations=(1, 1, 1, 1),
# strides=(1, 2, 2, 2),
# norm_cfg=norm_cfg,
# norm_eval=True,
# style='pytorch',
# contract_dilation=True,
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
decode_head=dict(
type='MaskFormerHead',
in_channels=[256, 512, 1024,
2048], # input channels of pixel_decoder modules
feat_channels=256,
in_index=[0, 1, 2, 3],
num_classes=150, # TODO
out_channels=256,
num_queries=100,
pixel_decoder=dict(
type='mmdet.PixelDecoder',
norm_cfg=dict(type='GN', num_groups=32),
act_cfg=dict(type='ReLU')),
enforce_decoder_input_project=False,
positional_encoding=dict( # SinePositionalEncoding
num_feats=128, normalize=True),
transformer_decoder=dict( # DetrTransformerDecoder
return_intermediate=True,
num_layers=6,
layer_cfg=dict( # DetrTransformerDecoderLayer
self_attn_cfg=dict( # MultiheadAttention
embed_dims=256,
num_heads=8,
attn_drop=0.1,
proj_drop=0.1,
dropout_layer=None,
batch_first=True),
cross_attn_cfg=dict( # MultiheadAttention
embed_dims=256,
num_heads=8,
attn_drop=0.1,
proj_drop=0.1,
dropout_layer=None,
batch_first=True),
ffn_cfg=dict(
embed_dims=256,
feedforward_channels=2048,
num_fcs=2,
act_cfg=dict(type='ReLU', inplace=True),
ffn_drop=0.1,
dropout_layer=None,
add_identity=True)),
init_cfg=None),
loss_cls=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0,
reduction='mean',
class_weight=[1.0] * 150 + [0.1]), # TODO
loss_mask=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
reduction='mean',
loss_weight=20.0),
loss_dice=dict(
type='mmdet.DiceLoss',
use_sigmoid=True,
activate=True,
reduction='mean',
naive_dice=True,
eps=1.0,
loss_weight=1.0),
train_cfg=dict(
assigner=dict(
type='mmdet.HungarianAssigner',
match_costs=[
dict(type='mmdet.ClassificationCost', weight=1.0),
dict(
type='mmdet.FocalLossCost',
weight=20.0,
binary_input=True),
dict(
type='mmdet.DiceCost',
weight=1.0,
pred_act=True,
eps=1.0)
]),
sampler=dict(type='mmdet.MaskPseudoSampler'))),
# training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'),
)

View File

@@ -0,0 +1,88 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='My_BiSeNetV2_A1',
detail_channels=(64, 64, 128),
semantic_channels=(16, 32, 64, 128),
semantic_expansion_ratio=6,
bga_channels=128,
out_indices=(0, 1, 2, 3, 4),
init_cfg=None,
align_corners=False),
decode_head=dict(
type='FCNHead',
in_channels=128,
in_index=0,
channels=1024,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=16,
channels=16,
num_convs=2,
num_classes=19,
in_index=1,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=32,
channels=64,
num_convs=2,
num_classes=19,
in_index=2,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=64,
channels=256,
num_convs=2,
num_classes=19,
in_index=3,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=128,
channels=1024,
num_convs=2,
num_classes=19,
in_index=4,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,88 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='My_BiSeNetV2_A2',
detail_channels=(64, 64, 128),
semantic_channels=(16, 32, 64, 128),
semantic_expansion_ratio=6,
bga_channels=128,
out_indices=(0, 1, 2, 3, 4),
init_cfg=None,
align_corners=False),
decode_head=dict(
type='FCNHead',
in_channels=128,
in_index=0,
channels=1024,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=16,
channels=16,
num_convs=2,
num_classes=19,
in_index=1,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=32,
channels=64,
num_convs=2,
num_classes=19,
in_index=2,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=64,
channels=256,
num_convs=2,
num_classes=19,
in_index=3,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=128,
channels=1024,
num_convs=2,
num_classes=19,
in_index=4,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,54 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='NLHead',
in_channels=2048,
in_index=3,
channels=512,
dropout_ratio=0.1,
reduction=2,
use_scale=True,
mode='embedded_gaussian',
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,76 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='CascadeEncoderDecoder',
data_preprocessor=data_preprocessor,
num_stages=2,
pretrained='open-mmlab://msra/hrnetv2_w18',
backbone=dict(
type='HRNet',
norm_cfg=norm_cfg,
norm_eval=False,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(18, 36)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(18, 36, 72)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(18, 36, 72, 144)))),
decode_head=[
dict(
type='FCNHead',
in_channels=[18, 36, 72, 144],
channels=sum([18, 36, 72, 144]),
in_index=(0, 1, 2, 3),
input_transform='resize_concat',
kernel_size=1,
num_convs=1,
concat_input=False,
dropout_ratio=-1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='OCRHead',
in_channels=[18, 36, 72, 144],
in_index=(0, 1, 2, 3),
input_transform='resize_concat',
channels=512,
ocr_channels=256,
dropout_ratio=-1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,55 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='CascadeEncoderDecoder',
data_preprocessor=data_preprocessor,
num_stages=2,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=[
dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='OCRHead',
in_channels=2048,
in_index=3,
channels=512,
ocr_channels=256,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,68 @@
# The class_weight is borrowed from https://github.com/openseg-group/OCNet.pytorch/issues/14 # noqa
# Licensed under the MIT License
# 无侧重
# class_weight = [
# 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786, 1.0023,
# 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 1.0865, 1.1529, 1.0507
# ]
# class_weight = [
# 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786, 1.0023,
# 0.9539, 0.9843, 1.1116
# ]
checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/pidnet/pidnet-s_imagenet1k_20230306-715e6273.pth' # noqa
# crop_size = (1024, 1024)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255,
size=(1024, 1024))
norm_cfg = dict(type='SyncBN', requires_grad=True)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='PIDNet',
in_channels=3,
channels=32,
ppm_channels=96,
num_stem_blocks=2,
num_branch_blocks=3,
align_corners=False,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU', inplace=True),
init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
decode_head=dict(
type='PIDHead',
in_channels=128,
channels=128,
num_classes=19,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU', inplace=True),
align_corners=True,
loss_decode=[
dict(
type='CrossEntropyLoss',
use_sigmoid=False,
# class_weight=class_weight,
loss_weight=0.4),
dict(
type='OhemCrossEntropy',
thres=0.9,
min_kept=131072,
# class_weight=class_weight,
loss_weight=1.0),
dict(type='BoundaryLoss', loss_weight=20.0),
dict(
type='OhemCrossEntropy',
thres=0.9,
min_kept=131072,
# class_weight=class_weight,
loss_weight=1.0)
]),
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,64 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='CascadeEncoderDecoder',
data_preprocessor=data_preprocessor,
num_stages=2,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 1, 1),
strides=(1, 2, 2, 2),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=4),
decode_head=[
dict(
type='FPNHead',
in_channels=[256, 256, 256, 256],
in_index=[0, 1, 2, 3],
feature_strides=[4, 8, 16, 32],
channels=128,
dropout_ratio=-1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='PointHead',
in_channels=[256],
in_index=[0],
channels=256,
num_fcs=3,
coarse_pred_each_layer=True,
dropout_ratio=-1,
num_classes=19,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
],
# model training and testing settings
train_cfg=dict(
num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75),
test_cfg=dict(
mode='whole',
subdivision_steps=2,
subdivision_num_points=8196,
scale_factor=2))

View File

@@ -0,0 +1,57 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='PSAHead',
in_channels=2048,
in_index=3,
channels=512,
mask_size=(97, 97),
psa_type='bi-direction',
compact=False,
shrink_factor=2,
normalization_factor=1.0,
psa_softmax=True,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,52 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 2, 4),
strides=(1, 2, 1, 1),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='PSPHead',
in_channels=2048,
in_index=3,
channels=512,
pool_scales=(1, 2, 3, 6),
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,58 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='UNet',
in_channels=3,
base_channels=64,
num_stages=5,
strides=(1, 1, 1, 1, 1),
enc_num_convs=(2, 2, 2, 2, 2),
dec_num_convs=(2, 2, 2, 2),
downsamples=(True, True, True, True),
enc_dilations=(1, 1, 1, 1, 1),
dec_dilations=(1, 1, 1, 1),
with_cp=False,
conv_cfg=None,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU'),
upsample_cfg=dict(type='InterpConv'),
norm_eval=False),
decode_head=dict(
type='PSPHead',
in_channels=64,
in_index=4,
channels=16,
pool_scales=(1, 2, 3, 6),
dropout_ratio=0.1,
num_classes=2,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=128,
in_index=3,
channels=64,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=2,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='slide', crop_size=256, stride=170))

View File

@@ -0,0 +1,137 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[122.7709, 116.7460, 104.0937],
std=[68.5005, 66.6322, 70.3232],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255,
size_divisor=640,
test_cfg=dict(size_divisor=32))
num_classes = 171
model = dict(
type='MultimodalEncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='pretrain/clip_vit_base_patch16_224.pth',
asymetric_input=True,
encoder_resolution=0.5,
image_encoder=dict(
type='VisionTransformer',
img_size=(224, 224),
patch_size=16,
patch_pad=0,
in_channels=3,
embed_dims=768,
num_layers=9,
num_heads=12,
mlp_ratio=4,
out_origin=True,
out_indices=(2, 5, 8),
qkv_bias=True,
drop_rate=0.0,
attn_drop_rate=0.0,
drop_path_rate=0.0,
with_cls_token=True,
output_cls_token=True,
patch_bias=False,
pre_norm=True,
norm_cfg=dict(type='LN', eps=1e-5),
act_cfg=dict(type='QuickGELU'),
norm_eval=False,
interpolate_mode='bicubic',
frozen_exclude=['pos_embed']),
text_encoder=dict(
type='CLIPTextEncoder',
dataset_name=None,
templates='vild',
embed_dims=512,
num_layers=12,
num_heads=8,
mlp_ratio=4,
output_dims=512,
cache_feature=True,
cat_bg=True,
norm_cfg=dict(type='LN', eps=1e-5)
),
decode_head=dict(
type='SideAdapterCLIPHead',
num_classes=num_classes,
deep_supervision_idxs=[7],
san_cfg=dict(
in_channels=3,
clip_channels=768,
embed_dims=240,
patch_size=16,
patch_bias=True,
num_queries=100,
cfg_encoder=dict(
num_encode_layer=8,
num_heads=6,
mlp_ratio=4
),
fusion_index=[0, 1, 2, 3],
cfg_decoder=dict(
num_heads=12,
num_layers=1,
embed_channels=256,
mlp_channels=256,
num_mlp=3,
rescale=True),
norm_cfg=dict(type='LN', eps=1e-6),
),
maskgen_cfg=dict(
sos_token_format='cls_token',
sos_token_num=100,
cross_attn=False,
num_layers=3,
embed_dims=768,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
out_dims=512,
final_norm=True,
act_cfg=dict(type='QuickGELU'),
norm_cfg=dict(type='LN', eps=1e-5),
frozen_exclude=[]
),
align_corners=False,
train_cfg=dict(
num_points=12544,
oversample_ratio=3.0,
importance_sample_ratio=0.75,
assigner=dict(
type='HungarianAssigner',
match_costs=[
dict(type='ClassificationCost', weight=2.0),
dict(
type='CrossEntropyLossCost',
weight=5.0,
use_sigmoid=True),
dict(
type='DiceCost',
weight=5.0,
pred_act=True,
eps=1.0)
])),
loss_decode=[dict(type='CrossEntropyLoss',
loss_name='loss_cls_ce',
loss_weight=2.0,
class_weight=[1.0] * num_classes + [0.1]),
dict(type='CrossEntropyLoss',
use_sigmoid=True,
loss_name='loss_mask_ce',
loss_weight=5.0),
dict(type='DiceLoss',
ignore_index=None,
naive_dice=True,
eps=1,
loss_name='loss_mask_dice',
loss_weight=5.0)
]),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole')) # yapf: disable

View File

@@ -0,0 +1,42 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='MixVisionTransformer',
in_channels=3,
embed_dims=32,
num_stages=4,
num_layers=[2, 2, 2, 2],
num_heads=[1, 2, 5, 8],
patch_sizes=[7, 3, 3, 3],
sr_ratios=[8, 4, 2, 1],
out_indices=(0, 1, 2, 3),
mlp_ratio=4,
qkv_bias=True,
drop_rate=0.0,
attn_drop_rate=0.0,
drop_path_rate=0.1),
decode_head=dict(
type='SegformerHead',
in_channels=[32, 64, 160, 256],
in_index=[0, 1, 2, 3],
channels=256,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,44 @@
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth' # noqa
# model settings
backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=checkpoint,
backbone=dict(
type='VisionTransformer',
img_size=(512, 512),
patch_size=16,
in_channels=3,
embed_dims=768,
num_layers=12,
num_heads=12,
drop_path_rate=0.1,
attn_drop_rate=0.0,
drop_rate=0.0,
final_norm=True,
norm_cfg=backbone_norm_cfg,
with_cls_token=True,
interpolate_mode='bicubic',
),
decode_head=dict(
type='SegmenterMaskTransformerHead',
in_channels=768,
channels=768,
num_classes=150,
num_layers=2,
num_heads=12,
embed_dims=768,
dropout_ratio=0.0,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
),
test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)),
)

View File

@@ -0,0 +1,103 @@
# model settings
backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth',
backbone=dict(
type='VisionTransformer',
img_size=(768, 768),
patch_size=16,
in_channels=3,
embed_dims=1024,
num_layers=24,
num_heads=16,
out_indices=(5, 11, 17, 23),
drop_rate=0.1,
norm_cfg=backbone_norm_cfg,
with_cls_token=False,
interpolate_mode='bilinear',
),
neck=dict(
type='MLANeck',
in_channels=[1024, 1024, 1024, 1024],
out_channels=256,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU'),
),
decode_head=dict(
type='SETRMLAHead',
in_channels=(256, 256, 256, 256),
channels=512,
in_index=(0, 1, 2, 3),
dropout_ratio=0,
mla_channels=128,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=256,
channels=256,
in_index=0,
dropout_ratio=0,
num_convs=0,
kernel_size=1,
concat_input=False,
num_classes=19,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='FCNHead',
in_channels=256,
channels=256,
in_index=1,
dropout_ratio=0,
num_convs=0,
kernel_size=1,
concat_input=False,
num_classes=19,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='FCNHead',
in_channels=256,
channels=256,
in_index=2,
dropout_ratio=0,
num_convs=0,
kernel_size=1,
concat_input=False,
num_classes=19,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='FCNHead',
in_channels=256,
channels=256,
in_index=3,
dropout_ratio=0,
num_convs=0,
kernel_size=1,
concat_input=False,
num_classes=19,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
],
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,88 @@
# model settings
backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth',
backbone=dict(
type='VisionTransformer',
img_size=(768, 768),
patch_size=16,
in_channels=3,
embed_dims=1024,
num_layers=24,
num_heads=16,
out_indices=(9, 14, 19, 23),
drop_rate=0.1,
norm_cfg=backbone_norm_cfg,
with_cls_token=True,
interpolate_mode='bilinear',
),
decode_head=dict(
type='SETRUPHead',
in_channels=1024,
channels=256,
in_index=3,
num_classes=19,
dropout_ratio=0,
norm_cfg=norm_cfg,
num_convs=1,
up_scale=4,
kernel_size=1,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='SETRUPHead',
in_channels=1024,
channels=256,
in_index=0,
num_classes=19,
dropout_ratio=0,
norm_cfg=norm_cfg,
num_convs=1,
up_scale=4,
kernel_size=1,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='SETRUPHead',
in_channels=1024,
channels=256,
in_index=1,
num_classes=19,
dropout_ratio=0,
norm_cfg=norm_cfg,
num_convs=1,
up_scale=4,
kernel_size=1,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='SETRUPHead',
in_channels=1024,
channels=256,
in_index=2,
num_classes=19,
dropout_ratio=0,
norm_cfg=norm_cfg,
num_convs=1,
up_scale=4,
kernel_size=1,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4))
],
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,88 @@
# model settings
backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth',
backbone=dict(
type='VisionTransformer',
img_size=(768, 768),
patch_size=16,
in_channels=3,
embed_dims=1024,
num_layers=24,
num_heads=16,
out_indices=(9, 14, 19, 23),
drop_rate=0.1,
norm_cfg=backbone_norm_cfg,
with_cls_token=True,
interpolate_mode='bilinear',
),
decode_head=dict(
type='SETRUPHead',
in_channels=1024,
channels=256,
in_index=3,
num_classes=19,
dropout_ratio=0,
norm_cfg=norm_cfg,
num_convs=4,
up_scale=2,
kernel_size=3,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='SETRUPHead',
in_channels=1024,
channels=256,
in_index=0,
num_classes=19,
dropout_ratio=0,
norm_cfg=norm_cfg,
num_convs=1,
up_scale=4,
kernel_size=3,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='SETRUPHead',
in_channels=1024,
channels=256,
in_index=1,
num_classes=19,
dropout_ratio=0,
norm_cfg=norm_cfg,
num_convs=1,
up_scale=4,
kernel_size=3,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
dict(
type='SETRUPHead',
in_channels=1024,
channels=256,
in_index=2,
num_classes=19,
dropout_ratio=0,
norm_cfg=norm_cfg,
num_convs=1,
up_scale=4,
kernel_size=3,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
],
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,91 @@
norm_cfg = dict(type='BN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='STDCContextPathNet',
backbone_cfg=dict(
type='STDCNet',
stdc_type='STDCNet1',
in_channels=3,
channels=(32, 64, 256, 512, 1024),
bottleneck_type='cat',
num_convs=4,
norm_cfg=norm_cfg,
act_cfg=dict(type='ReLU'),
with_final_conv=False),
last_in_channels=(1024, 512),
out_channels=128,
ffm_cfg=dict(in_channels=384, out_channels=256, scale_factor=4)),
decode_head=dict(
type='FCNHead',
in_channels=256,
channels=256,
num_convs=1,
num_classes=19,
in_index=3,
concat_input=False,
dropout_ratio=0.1,
norm_cfg=norm_cfg,
align_corners=True,
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=[
dict(
type='FCNHead',
in_channels=128,
channels=64,
num_convs=1,
num_classes=19,
in_index=2,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='FCNHead',
in_channels=128,
channels=64,
num_convs=1,
num_classes=19,
in_index=1,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=False,
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
dict(
type='STDCHead',
in_channels=256,
channels=64,
num_convs=1,
num_classes=2,
boundary_threshold=0.1,
in_index=0,
norm_cfg=norm_cfg,
concat_input=False,
align_corners=True,
loss_decode=[
dict(
type='CrossEntropyLoss',
loss_name='loss_ce',
use_sigmoid=True,
loss_weight=1.0),
dict(type='DiceLoss', loss_name='loss_dice', loss_weight=1.0)
]),
],
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,53 @@
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa
# model settings
backbone_norm_cfg = dict(type='LN')
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='PCPVT',
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
in_channels=3,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
patch_sizes=[4, 2, 2, 2],
strides=[4, 2, 2, 2],
mlp_ratios=[8, 8, 4, 4],
out_indices=(0, 1, 2, 3),
qkv_bias=True,
norm_cfg=backbone_norm_cfg,
depths=[3, 4, 6, 3],
sr_ratios=[8, 4, 2, 1],
norm_after_stage=False,
drop_rate=0.0,
attn_drop_rate=0.,
drop_path_rate=0.2),
neck=dict(
type='FPN',
in_channels=[64, 128, 320, 512],
out_channels=256,
num_outs=4),
decode_head=dict(
type='FPNHead',
in_channels=[256, 256, 256, 256],
in_index=[0, 1, 2, 3],
feature_strides=[4, 8, 16, 32],
channels=128,
dropout_ratio=0.1,
num_classes=150,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,61 @@
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa
# model settings
backbone_norm_cfg = dict(type='LN')
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
backbone=dict(
type='PCPVT',
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
in_channels=3,
embed_dims=[64, 128, 320, 512],
num_heads=[1, 2, 5, 8],
patch_sizes=[4, 2, 2, 2],
strides=[4, 2, 2, 2],
mlp_ratios=[8, 8, 4, 4],
out_indices=(0, 1, 2, 3),
qkv_bias=True,
norm_cfg=backbone_norm_cfg,
depths=[3, 4, 6, 3],
sr_ratios=[8, 4, 2, 1],
norm_after_stage=False,
drop_rate=0.0,
attn_drop_rate=0.,
drop_path_rate=0.2),
decode_head=dict(
type='UPerHead',
in_channels=[64, 128, 320, 512],
in_index=[0, 1, 2, 3],
pool_scales=(1, 2, 3, 6),
channels=512,
dropout_ratio=0.1,
num_classes=150,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=320,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=150,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,58 @@
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='BEiT',
img_size=(640, 640),
patch_size=16,
in_channels=3,
embed_dims=768,
num_layers=12,
num_heads=12,
mlp_ratio=4,
out_indices=(3, 5, 7, 11),
qv_bias=True,
attn_drop_rate=0.0,
drop_path_rate=0.1,
norm_cfg=dict(type='LN', eps=1e-6),
act_cfg=dict(type='GELU'),
norm_eval=False,
init_values=0.1),
neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
decode_head=dict(
type='UPerHead',
in_channels=[768, 768, 768, 768],
in_index=[0, 1, 2, 3],
pool_scales=(1, 2, 3, 6),
channels=768,
dropout_ratio=0.1,
num_classes=150,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=768,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=150,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,52 @@
norm_cfg = dict(type='SyncBN', requires_grad=True)
custom_imports = dict(imports='mmpretrain.models', allow_failed_imports=False)
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth' # noqa
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='mmpretrain.ConvNeXt',
arch='base',
out_indices=[0, 1, 2, 3],
drop_path_rate=0.4,
layer_scale_init_value=1.0,
gap_before_final_norm=False,
init_cfg=dict(
type='Pretrained', checkpoint=checkpoint_file,
prefix='backbone.')),
decode_head=dict(
type='UPerHead',
in_channels=[128, 256, 512, 1024],
in_index=[0, 1, 2, 3],
pool_scales=(1, 2, 3, 6),
channels=512,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=384,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,57 @@
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='MAE',
img_size=(640, 640),
patch_size=16,
in_channels=3,
embed_dims=768,
num_layers=12,
num_heads=12,
mlp_ratio=4,
out_indices=(3, 5, 7, 11),
attn_drop_rate=0.0,
drop_path_rate=0.1,
norm_cfg=dict(type='LN', eps=1e-6),
act_cfg=dict(type='GELU'),
norm_eval=False,
init_values=0.1),
neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
decode_head=dict(
type='UPerHead',
in_channels=[384, 384, 384, 384],
in_index=[0, 1, 2, 3],
pool_scales=(1, 2, 3, 6),
channels=512,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=384,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,52 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c',
backbone=dict(
type='ResNetV1c',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
dilations=(1, 1, 1, 1),
strides=(1, 2, 2, 2),
norm_cfg=norm_cfg,
norm_eval=False,
style='pytorch',
contract_dilation=True),
decode_head=dict(
type='UPerHead',
in_channels=[256, 512, 1024, 2048],
in_index=[0, 1, 2, 3],
pool_scales=(1, 2, 3, 6),
channels=512,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=1024,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,62 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
backbone_norm_cfg = dict(type='LN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained=None,
backbone=dict(
type='SwinTransformer',
pretrain_img_size=224,
embed_dims=96,
patch_size=4,
window_size=7,
mlp_ratio=4,
depths=[2, 2, 6, 2],
num_heads=[3, 6, 12, 24],
strides=(4, 2, 2, 2),
out_indices=(0, 1, 2, 3),
qkv_bias=True,
qk_scale=None,
patch_norm=True,
drop_rate=0.,
attn_drop_rate=0.,
drop_path_rate=0.3,
use_abs_pos_embed=False,
act_cfg=dict(type='GELU'),
norm_cfg=backbone_norm_cfg),
decode_head=dict(
type='UPerHead',
in_channels=[96, 192, 384, 768],
in_index=[0, 1, 2, 3],
pool_scales=(1, 2, 3, 6),
channels=512,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=384,
in_index=2,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole'))

View File

@@ -0,0 +1,65 @@
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True)
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=255)
model = dict(
type='EncoderDecoder',
data_preprocessor=data_preprocessor,
pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth',
backbone=dict(
type='VisionTransformer',
img_size=(512, 512),
patch_size=16,
in_channels=3,
embed_dims=768,
num_layers=12,
num_heads=12,
mlp_ratio=4,
out_indices=(2, 5, 8, 11),
qkv_bias=True,
drop_rate=0.0,
attn_drop_rate=0.0,
drop_path_rate=0.0,
with_cls_token=True,
norm_cfg=dict(type='LN', eps=1e-6),
act_cfg=dict(type='GELU'),
norm_eval=False,
interpolate_mode='bicubic'),
neck=dict(
type='MultiLevelNeck',
in_channels=[768, 768, 768, 768],
out_channels=768,
scales=[4, 2, 1, 0.5]),
decode_head=dict(
type='UPerHead',
in_channels=[768, 768, 768, 768],
in_index=[0, 1, 2, 3],
pool_scales=(1, 2, 3, 6),
channels=512,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
auxiliary_head=dict(
type='FCNHead',
in_channels=768,
in_index=3,
channels=256,
num_convs=1,
concat_input=False,
dropout_ratio=0.1,
num_classes=19,
norm_cfg=norm_cfg,
align_corners=False,
loss_decode=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
# model training and testing settings
train_cfg=dict(),
test_cfg=dict(mode='whole')) # yapf: disable

View File

@@ -0,0 +1,86 @@
# model settings
data_preprocessor = dict(
type='SegDataPreProcessor',
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
bgr_to_rgb=True,
pad_val=0,
seg_pad_val=0)
# adapted from stable-diffusion/configs/stable-diffusion/v1-inference.yaml
stable_diffusion_cfg = dict(
base_learning_rate=0.0001,
target='ldm.models.diffusion.ddpm.LatentDiffusion',
checkpoint='https://download.openmmlab.com/mmsegmentation/v0.5/'
'vpd/stable_diffusion_v1-5_pretrain_third_party.pth',
params=dict(
linear_start=0.00085,
linear_end=0.012,
num_timesteps_cond=1,
log_every_t=200,
timesteps=1000,
first_stage_key='jpg',
cond_stage_key='txt',
image_size=64,
channels=4,
cond_stage_trainable=False,
conditioning_key='crossattn',
monitor='val/loss_simple_ema',
scale_factor=0.18215,
use_ema=False,
scheduler_config=dict(
target='ldm.lr_scheduler.LambdaLinearScheduler',
params=dict(
warm_up_steps=[10000],
cycle_lengths=[10000000000000],
f_start=[1e-06],
f_max=[1.0],
f_min=[1.0])),
unet_config=dict(
target='ldm.modules.diffusionmodules.openaimodel.UNetModel',
params=dict(
image_size=32,
in_channels=4,
out_channels=4,
model_channels=320,
attention_resolutions=[4, 2, 1],
num_res_blocks=2,
channel_mult=[1, 2, 4, 4],
num_heads=8,
use_spatial_transformer=True,
transformer_depth=1,
context_dim=768,
use_checkpoint=True,
legacy=False)),
first_stage_config=dict(
target='ldm.models.autoencoder.AutoencoderKL',
params=dict(
embed_dim=4,
monitor='val/rec_loss',
ddconfig=dict(
double_z=True,
z_channels=4,
resolution=256,
in_channels=3,
out_ch=3,
ch=128,
ch_mult=[1, 2, 4, 4],
num_res_blocks=2,
attn_resolutions=[],
dropout=0.0),
lossconfig=dict(target='torch.nn.Identity'))),
cond_stage_config=dict(
target='ldm.modules.encoders.modules.AbstractEncoder')))
model = dict(
type='DepthEstimator',
data_preprocessor=data_preprocessor,
backbone=dict(
type='VPD',
diffusion_cfg=stable_diffusion_cfg,
),
)
# some of the parameters in stable-diffusion model will not be updated
# during training
find_unused_parameters = True

View File

@@ -0,0 +1,25 @@
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
# learning policy
param_scheduler = [
dict(
type='PolyLR',
eta_min=1e-4,
power=0.9,
begin=0,
end=160000,
by_epoch=False)
]
# training schedule for 160k
train_cfg = dict(
type='IterBasedTrainLoop', max_iters=160000, val_interval=16000)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='SegVisualizationHook'))

View File

@@ -0,0 +1,25 @@
# optimizer
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
# learning policy
param_scheduler = [
dict(
type='PolyLR',
eta_min=1e-4,
power=0.9,
begin=0,
end=160000,
by_epoch=False)
]
# training schedule for 160k
train_cfg = dict(
type='IterBasedTrainLoop', max_iters=160000, val_interval=10000)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=10000),
sampler_seed=dict(type='DistSamplerSeedHook'),
visualization=dict(type='SegVisualizationHook'))

Some files were not shown because too many files have changed in this diff Show More