first commit
This commit is contained in:
68
Seg_All_In_One_MMSeg/configs/_base_/datasets/ade20k.py
Normal file
68
Seg_All_In_One_MMSeg/configs/_base_/datasets/ade20k.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# dataset settings
|
||||
dataset_type = 'ADE20KDataset'
|
||||
data_root = 'data/ade/ADEChallengeData2016'
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 512),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/training', seg_map_path='annotations/training'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,68 @@
|
||||
# dataset settings
|
||||
dataset_type = 'ADE20KDataset'
|
||||
data_root = 'data/ade/ADEChallengeData2016'
|
||||
crop_size = (640, 640)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2560, 640),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2560, 640), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/training', seg_map_path='annotations/training'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
70
Seg_All_In_One_MMSeg/configs/_base_/datasets/bdd100k.py
Normal file
70
Seg_All_In_One_MMSeg/configs/_base_/datasets/bdd100k.py
Normal file
@@ -0,0 +1,70 @@
|
||||
# dataset settings
|
||||
dataset_type = 'BDD100KDataset'
|
||||
data_root = 'data/bdd100k/'
|
||||
|
||||
crop_size = (512, 1024)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 1024),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=2,
|
||||
num_workers=2,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/10k/train',
|
||||
seg_map_path='labels/sem_seg/masks/train'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/10k/val',
|
||||
seg_map_path='labels/sem_seg/masks/val'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
75
Seg_All_In_One_MMSeg/configs/_base_/datasets/chase_db1.py
Normal file
75
Seg_All_In_One_MMSeg/configs/_base_/datasets/chase_db1.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# dataset settings
|
||||
dataset_type = 'ChaseDB1Dataset'
|
||||
data_root = 'data/CHASE_DB1'
|
||||
img_scale = (960, 999)
|
||||
crop_size = (128, 128)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=img_scale,
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='RepeatDataset',
|
||||
times=40000,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/training',
|
||||
seg_map_path='annotations/training'),
|
||||
pipeline=train_pipeline)))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
||||
test_evaluator = val_evaluator
|
||||
67
Seg_All_In_One_MMSeg/configs/_base_/datasets/cityscapes.py
Normal file
67
Seg_All_In_One_MMSeg/configs/_base_/datasets/cityscapes.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# dataset settings
|
||||
dataset_type = 'CityscapesDataset'
|
||||
data_root = 'data/cityscapes/'
|
||||
crop_size = (512, 1024)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 1024),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=2,
|
||||
num_workers=2,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='leftImg8bit/train', seg_map_path='gtFine/train'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,29 @@
|
||||
_base_ = './cityscapes.py'
|
||||
crop_size = (1024, 1024)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 1024),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,29 @@
|
||||
_base_ = './cityscapes.py'
|
||||
crop_size = (768, 768)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2049, 1025),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,29 @@
|
||||
_base_ = './cityscapes.py'
|
||||
crop_size = (769, 769)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2049, 1025),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2049, 1025), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,29 @@
|
||||
_base_ = './cityscapes.py'
|
||||
crop_size = (832, 832)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 1024),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
|
||||
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,69 @@
|
||||
# dataset settings
|
||||
dataset_type = 'COCOStuffDataset'
|
||||
data_root = 'data/coco_stuff10k'
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 512),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
reduce_zero_label=True,
|
||||
data_prefix=dict(
|
||||
img_path='images/train2014', seg_map_path='annotations/train2014'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
reduce_zero_label=True,
|
||||
data_prefix=dict(
|
||||
img_path='images/test2014', seg_map_path='annotations/test2014'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,67 @@
|
||||
# dataset settings
|
||||
dataset_type = 'COCOStuffDataset'
|
||||
data_root = 'data/coco_stuff164k'
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 512),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/train2017', seg_map_path='annotations/train2017'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/val2017', seg_map_path='annotations/val2017'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
73
Seg_All_In_One_MMSeg/configs/_base_/datasets/drive.py
Normal file
73
Seg_All_In_One_MMSeg/configs/_base_/datasets/drive.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# dataset settings
|
||||
dataset_type = 'DRIVEDataset'
|
||||
data_root = 'data/DRIVE'
|
||||
img_scale = (584, 565)
|
||||
crop_size = (64, 64)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=img_scale,
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='RepeatDataset',
|
||||
times=40000,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/training',
|
||||
seg_map_path='annotations/training'),
|
||||
pipeline=train_pipeline)))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
||||
test_evaluator = val_evaluator
|
||||
73
Seg_All_In_One_MMSeg/configs/_base_/datasets/hrf.py
Normal file
73
Seg_All_In_One_MMSeg/configs/_base_/datasets/hrf.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# dataset settings
|
||||
dataset_type = 'HRFDataset'
|
||||
data_root = 'data/HRF'
|
||||
img_scale = (2336, 3504)
|
||||
crop_size = (256, 256)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=img_scale,
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='RepeatDataset',
|
||||
times=40000,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/training',
|
||||
seg_map_path='annotations/training'),
|
||||
pipeline=train_pipeline)))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
||||
test_evaluator = val_evaluator
|
||||
53
Seg_All_In_One_MMSeg/configs/_base_/datasets/hsi_drive.py
Normal file
53
Seg_All_In_One_MMSeg/configs/_base_/datasets/hsi_drive.py
Normal file
@@ -0,0 +1,53 @@
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromNpyFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='RandomCrop', crop_size=(192, 384)),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromNpyFile'),
|
||||
dict(type='RandomCrop', crop_size=(192, 384)),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='HSIDrive20Dataset',
|
||||
data_root='data/HSIDrive20',
|
||||
data_prefix=dict(
|
||||
img_path='images/training', seg_map_path='annotations/training'),
|
||||
pipeline=train_pipeline))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='HSIDrive20Dataset',
|
||||
data_root='data/HSIDrive20',
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=test_pipeline))
|
||||
|
||||
test_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='HSIDrive20Dataset',
|
||||
data_root='data/HSIDrive20',
|
||||
data_prefix=dict(
|
||||
img_path='images/test', seg_map_path='annotations/test'),
|
||||
pipeline=test_pipeline))
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'], ignore_index=0)
|
||||
test_evaluator = val_evaluator
|
||||
73
Seg_All_In_One_MMSeg/configs/_base_/datasets/isaid.py
Normal file
73
Seg_All_In_One_MMSeg/configs/_base_/datasets/isaid.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# dataset settings
|
||||
dataset_type = 'iSAIDDataset'
|
||||
data_root = 'data/iSAID'
|
||||
"""
|
||||
This crop_size setting is followed by the implementation of
|
||||
`PointFlow: Flowing Semantics Through Points for Aerial Image
|
||||
Segmentation <https://arxiv.org/pdf/2103.06564.pdf>`_.
|
||||
"""
|
||||
|
||||
crop_size = (896, 896)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(896, 896),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(896, 896), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='img_dir/train', seg_map_path='ann_dir/train'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,68 @@
|
||||
# dataset settings
|
||||
dataset_type = 'LEVIRCDDataset'
|
||||
data_root = r'data/LEVIRCD'
|
||||
|
||||
albu_train_transforms = [
|
||||
dict(type='RandomBrightnessContrast', p=0.2),
|
||||
dict(type='HorizontalFlip', p=0.5),
|
||||
dict(type='VerticalFlip', p=0.5)
|
||||
]
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadMultipleRSImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='Albu',
|
||||
keymap={
|
||||
'img': 'image',
|
||||
'img2': 'image2',
|
||||
'gt_seg_map': 'mask'
|
||||
},
|
||||
transforms=albu_train_transforms,
|
||||
additional_targets={'image2': 'image'},
|
||||
bgr_to_rgb=False),
|
||||
dict(type='ConcatCDInput'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadMultipleRSImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='ConcatCDInput'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadMultipleRSImageFromFile'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[[dict(type='LoadAnnotations')],
|
||||
[dict(type='ConcatCDInput')],
|
||||
[dict(type='PackSegInputs')]])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='train/A',
|
||||
img_path2='train/B',
|
||||
seg_map_path='train/label'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='test/A', img_path2='test/B', seg_map_path='test/label'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
66
Seg_All_In_One_MMSeg/configs/_base_/datasets/loveda.py
Normal file
66
Seg_All_In_One_MMSeg/configs/_base_/datasets/loveda.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# dataset settings
|
||||
dataset_type = 'LoveDADataset'
|
||||
data_root = 'data/loveDA'
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 512),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(1024, 1024), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='img_dir/train', seg_map_path='ann_dir/train'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
68
Seg_All_In_One_MMSeg/configs/_base_/datasets/mapillary_v1.py
Normal file
68
Seg_All_In_One_MMSeg/configs/_base_/datasets/mapillary_v1.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# dataset settings
|
||||
dataset_type = 'MapillaryDataset_v1'
|
||||
data_root = 'data/mapillary/'
|
||||
crop_size = (512, 1024)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 1024),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=2,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='training/images', seg_map_path='training/v1.2/labels'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='validation/images',
|
||||
seg_map_path='validation/v1.2/labels'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,37 @@
|
||||
# dataset settings
|
||||
_base_ = './mapillary_v1.py'
|
||||
metainfo = dict(
|
||||
classes=('Bird', 'Ground Animal', 'Curb', 'Fence', 'Guard Rail', 'Barrier',
|
||||
'Wall', 'Bike Lane', 'Crosswalk - Plain', 'Curb Cut', 'Parking',
|
||||
'Pedestrian Area', 'Rail Track', 'Road', 'Service Lane',
|
||||
'Sidewalk', 'Bridge', 'Building', 'Tunnel', 'Person', 'Bicyclist',
|
||||
'Motorcyclist', 'Other Rider', 'Lane Marking - Crosswalk',
|
||||
'Lane Marking - General', 'Mountain', 'Sand', 'Sky', 'Snow',
|
||||
'Terrain', 'Vegetation', 'Water', 'Banner', 'Bench', 'Bike Rack',
|
||||
'Billboard', 'Catch Basin', 'CCTV Camera', 'Fire Hydrant',
|
||||
'Junction Box', 'Mailbox', 'Manhole', 'Phone Booth', 'Pothole',
|
||||
'Street Light', 'Pole', 'Traffic Sign Frame', 'Utility Pole',
|
||||
'Traffic Light', 'Traffic Sign (Back)', 'Traffic Sign (Front)',
|
||||
'Trash Can', 'Bicycle', 'Boat', 'Bus', 'Car', 'Caravan',
|
||||
'Motorcycle', 'On Rails', 'Other Vehicle', 'Trailer', 'Truck',
|
||||
'Wheeled Slow', 'Car Mount', 'Ego Vehicle'),
|
||||
palette=[[165, 42, 42], [0, 192, 0], [196, 196, 196], [190, 153, 153],
|
||||
[180, 165, 180], [90, 120, 150], [102, 102, 156], [128, 64, 255],
|
||||
[140, 140, 200], [170, 170, 170], [250, 170, 160], [96, 96, 96],
|
||||
[230, 150, 140], [128, 64, 128], [110, 110, 110], [244, 35, 232],
|
||||
[150, 100, 100], [70, 70, 70], [150, 120, 90], [220, 20, 60],
|
||||
[255, 0, 0], [255, 0, 100], [255, 0, 200], [200, 128, 128],
|
||||
[255, 255, 255], [64, 170, 64], [230, 160, 50], [70, 130, 180],
|
||||
[190, 255, 255], [152, 251, 152], [107, 142, 35], [0, 170, 30],
|
||||
[255, 255, 128], [250, 0, 30], [100, 140, 180], [220, 220, 220],
|
||||
[220, 128, 128], [222, 40, 40], [100, 170, 30], [40, 40, 40],
|
||||
[33, 33, 33], [100, 128, 160], [142, 0, 0], [70, 100, 150],
|
||||
[210, 170, 100], [153, 153, 153], [128, 128, 128], [0, 0, 80],
|
||||
[250, 170, 30], [192, 192, 192], [220, 220, 0], [140, 140, 20],
|
||||
[119, 11, 32], [150, 0, 255], [0, 60, 100], [0, 0, 142],
|
||||
[0, 0, 90], [0, 0, 230], [0, 80, 100], [128, 64, 64], [0, 0, 110],
|
||||
[0, 0, 70], [0, 0, 192], [32, 32, 32], [120, 10, 10]])
|
||||
|
||||
train_dataloader = dict(dataset=dict(metainfo=metainfo))
|
||||
val_dataloader = dict(dataset=dict(metainfo=metainfo))
|
||||
test_dataloader = val_dataloader
|
||||
68
Seg_All_In_One_MMSeg/configs/_base_/datasets/mapillary_v2.py
Normal file
68
Seg_All_In_One_MMSeg/configs/_base_/datasets/mapillary_v2.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# dataset settings
|
||||
dataset_type = 'MapillaryDataset_v2'
|
||||
data_root = 'data/mapillary/'
|
||||
crop_size = (512, 1024)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 1024),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 1024), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=dict(backend='disk')),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=2,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='training/images', seg_map_path='training/v2.0/labels'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='validation/images',
|
||||
seg_map_path='validation/v2.0/labels'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
64
Seg_All_In_One_MMSeg/configs/_base_/datasets/my_dataset.py
Normal file
64
Seg_All_In_One_MMSeg/configs/_base_/datasets/my_dataset.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# dataset settings
|
||||
dataset_class_name = 'MyDataset' # TODO 上一步中你定义的数据集的名字
|
||||
data_root = '/home/audience/Desktop/Seg_data/Data' # TODO 数据集存储路径
|
||||
# img_norm_cfg = dict(
|
||||
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
|
||||
img_scale = (1920, 1080) # img_scale图像尺寸 TODO (1920,1080)
|
||||
crop_size = (512, 512) # 数据增强时裁剪的大小 TODO 之后可以修改
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
|
||||
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
# dict(type='GenerateEdge', edge_width=4), # For pidnet
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict( # Train dataloader config
|
||||
batch_size=4, # Batch size of a single GPU TODO
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
|
||||
dataset=dict( # Train dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='A_Ori',
|
||||
seg_map_path='A_Label_GT_label_fold'),
|
||||
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
|
||||
val_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='A_Ori',
|
||||
seg_map_path='A_Label_GT_label_fold'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
test_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='A_Ori',
|
||||
seg_map_path='A_Label_GT_label_fold'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
# The metric to measure the accuracy. Here, we use IoUMetric.
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,64 @@
|
||||
# dataset settings
|
||||
dataset_class_name = 'MyDataset_model' # TODO 上一步中你定义的数据集的名字
|
||||
data_root = '/home/wkmgc/Desktop/Seg/Seg_All_In_One_MMSeg/My_Data' # TODO 数据集存储路径
|
||||
# img_norm_cfg = dict(
|
||||
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
|
||||
img_scale = (1920, 1080) # img_scale图像尺寸 TODO (1920,1080)
|
||||
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
|
||||
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
# dict(type='GenerateEdge', edge_width=4), # For pidnet
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict( # Train dataloader config
|
||||
batch_size=16, # Batch size of a single GPU TODO
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
|
||||
dataset=dict( # Train dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='A_Ori',
|
||||
seg_map_path='A_Label_GT_label_fold'),
|
||||
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
|
||||
val_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='A_Ori',
|
||||
seg_map_path='A_Label_GT_label_fold'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
test_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='A_Ori',
|
||||
seg_map_path='A_Label_GT_label_fold'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
# The metric to measure the accuracy. Here, we use IoUMetric.
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
67
Seg_All_In_One_MMSeg/configs/_base_/datasets/nyu.py
Normal file
67
Seg_All_In_One_MMSeg/configs/_base_/datasets/nyu.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# dataset settings
|
||||
dataset_type = 'NYUDataset'
|
||||
data_root = 'data/nyu'
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
|
||||
dict(type='RandomDepthMix', prob=0.25),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='RandomCrop', crop_size=(480, 480)),
|
||||
dict(
|
||||
type='Albu',
|
||||
transforms=[
|
||||
dict(type='RandomBrightnessContrast'),
|
||||
dict(type='RandomGamma'),
|
||||
dict(type='HueSaturationValue'),
|
||||
]),
|
||||
dict(
|
||||
type='PackSegInputs',
|
||||
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
|
||||
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
||||
'category_id')),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2000, 480), keep_ratio=True),
|
||||
dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
|
||||
dict(
|
||||
type='PackSegInputs',
|
||||
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
|
||||
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
||||
'category_id'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/train', depth_map_path='annotations/train'),
|
||||
pipeline=train_pipeline))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
test_mode=True,
|
||||
data_prefix=dict(
|
||||
img_path='images/test', depth_map_path='annotations/test'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(
|
||||
type='DepthMetric',
|
||||
min_depth_eval=0.001,
|
||||
max_depth_eval=10.0,
|
||||
crop_type='nyu_crop')
|
||||
test_evaluator = val_evaluator
|
||||
72
Seg_All_In_One_MMSeg/configs/_base_/datasets/nyu_512x512.py
Normal file
72
Seg_All_In_One_MMSeg/configs/_base_/datasets/nyu_512x512.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# dataset settings
|
||||
dataset_type = 'NYUDataset'
|
||||
data_root = 'data/nyu'
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3),
|
||||
dict(type='RandomDepthMix', prob=0.25),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(768, 512),
|
||||
ratio_range=(0.8, 1.5),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=(512, 512)),
|
||||
dict(
|
||||
type='Albu',
|
||||
transforms=[
|
||||
dict(type='RandomBrightnessContrast'),
|
||||
dict(type='RandomGamma'),
|
||||
dict(type='HueSaturationValue'),
|
||||
]),
|
||||
dict(
|
||||
type='PackSegInputs',
|
||||
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
|
||||
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
||||
'category_id')),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
||||
dict(dict(type='LoadDepthAnnotation', depth_rescale_factor=1e-3)),
|
||||
dict(
|
||||
type='PackSegInputs',
|
||||
meta_keys=('img_path', 'depth_map_path', 'ori_shape', 'img_shape',
|
||||
'pad_shape', 'scale_factor', 'flip', 'flip_direction',
|
||||
'category_id'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/train', depth_map_path='annotations/train'),
|
||||
pipeline=train_pipeline))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
test_mode=True,
|
||||
data_prefix=dict(
|
||||
img_path='images/test', depth_map_path='annotations/test'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(
|
||||
type='DepthMetric',
|
||||
min_depth_eval=0.001,
|
||||
max_depth_eval=10.0,
|
||||
crop_type='nyu_crop')
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,56 @@
|
||||
# dataset settings
|
||||
dataset_type = 'PascalContextDataset'
|
||||
data_root = 'data/VOCdevkit/VOC2010/'
|
||||
|
||||
img_scale = (520, 520)
|
||||
crop_size = (480, 480)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=img_scale,
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
|
||||
ann_file='ImageSets/SegmentationContext/train.txt',
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
|
||||
ann_file='ImageSets/SegmentationContext/val.txt',
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,72 @@
|
||||
# dataset settings
|
||||
dataset_type = 'PascalContextDataset59'
|
||||
data_root = 'data/VOCdevkit/VOC2010/'
|
||||
|
||||
img_scale = (520, 520)
|
||||
crop_size = (480, 480)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=img_scale,
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
|
||||
ann_file='ImageSets/SegmentationContext/train.txt',
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='JPEGImages', seg_map_path='SegmentationClassContext'),
|
||||
ann_file='ImageSets/SegmentationContext/val.txt',
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
69
Seg_All_In_One_MMSeg/configs/_base_/datasets/pascal_voc12.py
Normal file
69
Seg_All_In_One_MMSeg/configs/_base_/datasets/pascal_voc12.py
Normal file
@@ -0,0 +1,69 @@
|
||||
# dataset settings
|
||||
dataset_type = 'PascalVOCDataset'
|
||||
data_root = 'data/VOCdevkit/VOC2012'
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 512),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='JPEGImages', seg_map_path='SegmentationClass'),
|
||||
ann_file='ImageSets/Segmentation/train.txt',
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='JPEGImages', seg_map_path='SegmentationClass'),
|
||||
ann_file='ImageSets/Segmentation/val.txt',
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,81 @@
|
||||
# dataset settings
|
||||
dataset_type = 'PascalVOCDataset'
|
||||
data_root = 'data/VOCdevkit/VOC2012'
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(2048, 512),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='Pad', size=crop_size),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(2048, 512), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
dataset_train = dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(img_path='JPEGImages', seg_map_path='SegmentationClass'),
|
||||
ann_file='ImageSets/Segmentation/train.txt',
|
||||
pipeline=train_pipeline)
|
||||
|
||||
dataset_aug = dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='JPEGImages', seg_map_path='SegmentationClassAug'),
|
||||
ann_file='ImageSets/Segmentation/aug.txt',
|
||||
pipeline=train_pipeline)
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(type='ConcatDataset', datasets=[dataset_train, dataset_aug]))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='JPEGImages', seg_map_path='SegmentationClass'),
|
||||
ann_file='ImageSets/Segmentation/val.txt',
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
66
Seg_All_In_One_MMSeg/configs/_base_/datasets/potsdam.py
Normal file
66
Seg_All_In_One_MMSeg/configs/_base_/datasets/potsdam.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# dataset settings
|
||||
dataset_type = 'PotsdamDataset'
|
||||
data_root = 'data/potsdam'
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(512, 512),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(512, 512), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='img_dir/train', seg_map_path='ann_dir/train'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,64 @@
|
||||
# dataset settings
|
||||
dataset_class_name = 'PublicDataSet_AutoLaparo' # TODO 上一步中你定义的数据集的名字
|
||||
data_root = '/home/wkmgc/Desktop/Seg/DataSet_Public/2_AutoLaparo-10Type-1920x1080' # TODO 数据集存储路径
|
||||
# img_norm_cfg = dict(
|
||||
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
|
||||
img_scale = (1920, 1080) # img_scale图像尺寸 TODO (1920,1080)
|
||||
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
|
||||
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
# dict(type='GenerateEdge', edge_width=4), # For pidnet
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict( # Train dataloader config
|
||||
batch_size=16, # Batch size of a single GPU TODO
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
|
||||
dataset=dict( # Train dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/train',
|
||||
seg_map_path='labels_GT/train'),
|
||||
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
|
||||
val_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/val',
|
||||
seg_map_path='labels_GT/val'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
test_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/val',
|
||||
seg_map_path='labels_GT/val'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
# The metric to measure the accuracy. Here, we use IoUMetric.
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,64 @@
|
||||
# dataset settings
|
||||
dataset_class_name = 'PublicDataSet_CholecSeg8k' # TODO 上一步中你定义的数据集的名字
|
||||
data_root = '/home/wkmgc/Desktop/Seg/DataSet_Public/1_CholecSeg8k-13Type-1920x1080' # TODO 数据集存储路径
|
||||
# img_norm_cfg = dict(
|
||||
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
|
||||
img_scale = (1920, 1080) # img_scale图像尺寸 TODO (1920,1080)
|
||||
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
|
||||
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
# dict(type='GenerateEdge', edge_width=4), # For pidnet
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict( # Train dataloader config
|
||||
batch_size=16, # Batch size of a single GPU TODO
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
|
||||
dataset=dict( # Train dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/train',
|
||||
seg_map_path='labels_GT/train'),
|
||||
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
|
||||
val_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/val',
|
||||
seg_map_path='labels_GT/val'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
test_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/val',
|
||||
seg_map_path='labels_GT/val'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
# The metric to measure the accuracy. Here, we use IoUMetric.
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,64 @@
|
||||
# dataset settings
|
||||
dataset_class_name = 'PublicDataSet_Dresden' # TODO 上一步中你定义的数据集的名字
|
||||
data_root = '/home/wkmgc/Desktop/Seg/DataSet_Public/4_Dresden-11Type-512x512' # TODO 数据集存储路径
|
||||
# img_norm_cfg = dict(
|
||||
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
|
||||
img_scale = (512, 512) # img_scale图像尺寸 TODO (1920,1080)
|
||||
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
|
||||
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
# dict(type='GenerateEdge', edge_width=4), # For pidnet
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict( # Train dataloader config
|
||||
batch_size=16, # Batch size of a single GPU TODO
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
|
||||
dataset=dict( # Train dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/train',
|
||||
seg_map_path='labels_GT/train'),
|
||||
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
|
||||
val_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/val',
|
||||
seg_map_path='labels_GT/val'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
test_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/test',
|
||||
seg_map_path='labels_GT/test'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
# The metric to measure the accuracy. Here, we use IoUMetric.
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,64 @@
|
||||
# dataset settings
|
||||
dataset_class_name = 'PublicDataSet_Endovis_2017' # TODO 上一步中你定义的数据集的名字
|
||||
data_root = '/home/wkmgc/Desktop/Seg/DataSet_Public/3_1_Endovis_2017-8Type-512x512' # TODO 数据集存储路径
|
||||
# img_norm_cfg = dict(
|
||||
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
|
||||
img_scale = (512, 512) # img_scale图像尺寸 TODO (1920,1080)
|
||||
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
|
||||
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
# dict(type='GenerateEdge', edge_width=4), # For pidnet
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict( # Train dataloader config
|
||||
batch_size=16, # Batch size of a single GPU TODO
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
|
||||
dataset=dict( # Train dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/train',
|
||||
seg_map_path='labels_GT/train'),
|
||||
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
|
||||
val_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/val',
|
||||
seg_map_path='labels_GT/val'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
test_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/val',
|
||||
seg_map_path='labels_GT/val'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
# The metric to measure the accuracy. Here, we use IoUMetric.
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
@@ -0,0 +1,64 @@
|
||||
# dataset settings
|
||||
dataset_class_name = 'PublicDataSet_Endovis_2018' # TODO 上一步中你定义的数据集的名字
|
||||
data_root = '/home/wkmgc/Desktop/Seg/DataSet_Public/3_2_Endovis_2018-8Type-512x512' # TODO 数据集存储路径
|
||||
# img_norm_cfg = dict(
|
||||
# mean=[33.30, 35.03, 47.23], std=[48.00, 50.4, 60.51], to_rgb=True) # TODO 数据集的均值和标准差,空引用默认的,也可以网上搜代码计算
|
||||
img_scale = (512, 512) # img_scale图像尺寸 TODO (1920,1080)
|
||||
crop_size = (256, 256) # 数据增强时裁剪的大小 TODO 之后可以修改
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'), # ", reduce_zero_label=False" TODO 是否忽略0直选项
|
||||
dict(type='RandomResize', scale=img_scale, ratio_range=(0.5, 2.0)),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
# dict(type='GenerateEdge', edge_width=4), # For pidnet
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict( # Train dataloader config
|
||||
batch_size=16, # Batch size of a single GPU TODO
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=True), # Randomly shuffle during training.
|
||||
dataset=dict( # Train dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/train',
|
||||
seg_map_path='labels_GT/train'),
|
||||
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
|
||||
val_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/val',
|
||||
seg_map_path='labels_GT/val'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
test_dataloader = dict(
|
||||
batch_size=1, # Batch size of a single GPU
|
||||
num_workers=4, # Worker to pre-fetch data for each single GPU
|
||||
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
|
||||
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
|
||||
dataset=dict( # Test dataset config
|
||||
type=dataset_class_name, # Type of dataset, refer to mmseg/datasets/ for details.
|
||||
data_root=data_root, # The root of dataset.
|
||||
data_prefix=dict(
|
||||
img_path='images/val',
|
||||
seg_map_path='labels_GT/val'),
|
||||
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
|
||||
# The metric to measure the accuracy. Here, we use IoUMetric.
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
90
Seg_All_In_One_MMSeg/configs/_base_/datasets/refuge.py
Normal file
90
Seg_All_In_One_MMSeg/configs/_base_/datasets/refuge.py
Normal file
@@ -0,0 +1,90 @@
|
||||
# dataset settings
|
||||
dataset_type = 'REFUGEDataset'
|
||||
data_root = 'data/REFUGE'
|
||||
train_img_scale = (2056, 2124)
|
||||
val_img_scale = (1634, 1634)
|
||||
test_img_scale = (1634, 1634)
|
||||
crop_size = (512, 512)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=False),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=train_img_scale,
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
val_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=val_img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=False),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=test_img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=False),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=dict(backend='local')),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/training', seg_map_path='annotations/training'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=val_pipeline))
|
||||
test_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/test', seg_map_path='annotations/test'),
|
||||
pipeline=val_pipeline))
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
||||
test_evaluator = val_evaluator
|
||||
73
Seg_All_In_One_MMSeg/configs/_base_/datasets/stare.py
Normal file
73
Seg_All_In_One_MMSeg/configs/_base_/datasets/stare.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# dataset settings
|
||||
dataset_type = 'STAREDataset'
|
||||
data_root = 'data/STARE'
|
||||
img_scale = (605, 700)
|
||||
crop_size = (128, 128)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=img_scale,
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='RepeatDataset',
|
||||
times=40000,
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/training',
|
||||
seg_map_path='annotations/training'),
|
||||
pipeline=train_pipeline)))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='images/validation',
|
||||
seg_map_path='annotations/validation'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
||||
test_evaluator = val_evaluator
|
||||
41
Seg_All_In_One_MMSeg/configs/_base_/datasets/synapse.py
Normal file
41
Seg_All_In_One_MMSeg/configs/_base_/datasets/synapse.py
Normal file
@@ -0,0 +1,41 @@
|
||||
dataset_type = 'SynapseDataset'
|
||||
data_root = 'data/synapse/'
|
||||
img_scale = (224, 224)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
dict(type='RandomRotFlip', rotate_prob=0.5, flip_prob=0.5, degree=20),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=img_scale, keep_ratio=True),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=6,
|
||||
num_workers=2,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='img_dir/train', seg_map_path='ann_dir/train'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mDice'])
|
||||
test_evaluator = val_evaluator
|
||||
66
Seg_All_In_One_MMSeg/configs/_base_/datasets/vaihingen.py
Normal file
66
Seg_All_In_One_MMSeg/configs/_base_/datasets/vaihingen.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# dataset settings
|
||||
dataset_type = 'ISPRSDataset'
|
||||
data_root = 'data/vaihingen'
|
||||
crop_size = (512, 512)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(512, 512),
|
||||
ratio_range=(0.5, 2.0),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='PhotoMetricDistortion'),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(512, 512), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(type='LoadAnnotations', reduce_zero_label=True),
|
||||
dict(type='PackSegInputs')
|
||||
]
|
||||
img_ratios = [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', backend_args=None),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(type='Resize', scale_factor=r, keep_ratio=True)
|
||||
for r in img_ratios
|
||||
],
|
||||
[
|
||||
dict(type='RandomFlip', prob=0., direction='horizontal'),
|
||||
dict(type='RandomFlip', prob=1., direction='horizontal')
|
||||
], [dict(type='LoadAnnotations')], [dict(type='PackSegInputs')]
|
||||
])
|
||||
]
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='InfiniteSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(
|
||||
img_path='img_dir/train', seg_map_path='ann_dir/train'),
|
||||
pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
|
||||
pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
|
||||
test_evaluator = val_evaluator
|
||||
15
Seg_All_In_One_MMSeg/configs/_base_/default_runtime.py
Normal file
15
Seg_All_In_One_MMSeg/configs/_base_/default_runtime.py
Normal file
@@ -0,0 +1,15 @@
|
||||
default_scope = 'mmseg'
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=True,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
vis_backends = [dict(type='LocalVisBackend')]
|
||||
visualizer = dict(
|
||||
type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
|
||||
log_processor = dict(by_epoch=False)
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume = False
|
||||
|
||||
tta_model = dict(type='SegTTAModel')
|
||||
54
Seg_All_In_One_MMSeg/configs/_base_/models/ann_r50-d8.py
Normal file
54
Seg_All_In_One_MMSeg/configs/_base_/models/ann_r50-d8.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='ANNHead',
|
||||
in_channels=[1024, 2048],
|
||||
in_index=[2, 3],
|
||||
channels=512,
|
||||
project_channels=256,
|
||||
query_scales=(1, ),
|
||||
key_pool_scales=(1, 3, 6, 8),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
52
Seg_All_In_One_MMSeg/configs/_base_/models/apcnet_r50-d8.py
Normal file
52
Seg_All_In_One_MMSeg/configs/_base_/models/apcnet_r50-d8.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='APCHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,78 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='BiSeNetV1',
|
||||
in_channels=3,
|
||||
context_channels=(128, 256, 512), # (512, 1024, 2048), # (512, 1024, 2048),
|
||||
spatial_channels=(64, 64, 64, 128), # (256, 256, 256, 512), # (256, 256, 256, 512),
|
||||
out_indices=(0, 1, 2),
|
||||
out_channels=256, # 1024, # 1024,
|
||||
backbone_cfg=dict(
|
||||
type='ResNet',
|
||||
in_channels=3,
|
||||
depth=18, # 50, # 101,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 1, 1),
|
||||
strides=(1, 2, 2, 2),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True
|
||||
# init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')
|
||||
),
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
init_cfg=None),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=256, # 1024 # 1024
|
||||
in_index=0,
|
||||
channels=256, # 1024 # 1024
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128, # 512
|
||||
channels=64, # 256
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=1,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128, # 512 # 512
|
||||
channels=64, # 256 # 256
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=2,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
88
Seg_All_In_One_MMSeg/configs/_base_/models/bisenetv2.py
Normal file
88
Seg_All_In_One_MMSeg/configs/_base_/models/bisenetv2.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='BiSeNetV2',
|
||||
detail_channels=(64, 64, 128),
|
||||
semantic_channels=(16, 32, 64, 128),
|
||||
semantic_expansion_ratio=6,
|
||||
bga_channels=128,
|
||||
out_indices=(0, 1, 2, 3, 4),
|
||||
init_cfg=None,
|
||||
align_corners=False),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
in_index=0,
|
||||
channels=1024,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=16,
|
||||
channels=16,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=1,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=32,
|
||||
channels=64,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=2,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=64,
|
||||
channels=256,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=3,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=1024,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=4,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,88 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='BiSeNetV2',
|
||||
detail_channels=(128, 128, 256), # (64, 64, 128)
|
||||
semantic_channels=(32, 64, 128, 256), # (16, 32, 64, 128)
|
||||
semantic_expansion_ratio=6,
|
||||
bga_channels=256, # 128
|
||||
out_indices=(0, 1, 2, 3, 4),
|
||||
init_cfg=None,
|
||||
align_corners=False),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=256, # 128
|
||||
in_index=0,
|
||||
channels=1024,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=32, # 16
|
||||
channels=16,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=1,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=64, # 32
|
||||
channels=64,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=2,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128, # 64
|
||||
channels=256,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=3,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=256, # 128
|
||||
channels=1024,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=4,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
52
Seg_All_In_One_MMSeg/configs/_base_/models/ccnet_r50-d8.py
Normal file
52
Seg_All_In_One_MMSeg/configs/_base_/models/ccnet_r50-d8.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='CCHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
recurrence=2,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
44
Seg_All_In_One_MMSeg/configs/_base_/models/cgnet.py
Normal file
44
Seg_All_In_One_MMSeg/configs/_base_/models/cgnet.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[72.39239876, 82.90891754, 73.15835921],
|
||||
std=[1, 1, 1],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='CGNet',
|
||||
norm_cfg=norm_cfg,
|
||||
in_channels=3,
|
||||
num_channels=(32, 64, 128),
|
||||
num_blocks=(3, 21),
|
||||
dilations=(2, 4),
|
||||
reductions=(8, 16)),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=256,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=0,
|
||||
concat_input=False,
|
||||
dropout_ratio=0,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss',
|
||||
use_sigmoid=False,
|
||||
loss_weight=1.0,
|
||||
class_weight=[
|
||||
2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352,
|
||||
10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905,
|
||||
10.347791, 6.3927646, 10.226669, 10.241062, 10.280587,
|
||||
10.396974, 10.055647
|
||||
]
|
||||
)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(sampler=None),
|
||||
test_cfg=dict(mode='whole'))
|
||||
52
Seg_All_In_One_MMSeg/configs/_base_/models/danet_r50-d8.py
Normal file
52
Seg_All_In_One_MMSeg/configs/_base_/models/danet_r50-d8.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='DAHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
pam_channels=64,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
55
Seg_All_In_One_MMSeg/configs/_base_/models/ddrnet.py
Normal file
55
Seg_All_In_One_MMSeg/configs/_base_/models/ddrnet.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# The class_weight is borrowed from https://github.com/openseg-group/OCNet.pytorch/issues/14 # noqa
|
||||
# Licensed under the MIT License
|
||||
# class_weight = [
|
||||
# 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786,
|
||||
# 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 1.0865, 1.1529,
|
||||
# 1.0507
|
||||
# ]
|
||||
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/ddrnet/pretrain/ddrnet23-in1kpre_3rdparty-9ca29f62.pth' # noqa
|
||||
# crop_size = (1024, 1024)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
size=(1024, 1024),
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='DDRNet',
|
||||
in_channels=3,
|
||||
channels=64,
|
||||
ppm_channels=128,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
init_cfg=dict(type='Pretrained', checkpoint=checkpoint)),
|
||||
decode_head=dict(
|
||||
type='DDRHead',
|
||||
in_channels=64 * 4,
|
||||
channels=128,
|
||||
dropout_ratio=0.,
|
||||
num_classes=19,
|
||||
align_corners=False,
|
||||
norm_cfg=norm_cfg,
|
||||
loss_decode=[
|
||||
dict(
|
||||
type='OhemCrossEntropy',
|
||||
thres=0.9,
|
||||
min_kept=131072,
|
||||
# class_weight=class_weight,
|
||||
loss_weight=1.0),
|
||||
dict(
|
||||
type='OhemCrossEntropy',
|
||||
thres=0.9,
|
||||
min_kept=131072,
|
||||
# class_weight=class_weight,s
|
||||
loss_weight=0.4),
|
||||
]),
|
||||
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,52 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='ASPPHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
dilations=(1, 12, 24, 36),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,58 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='UNet',
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
dec_num_convs=(2, 2, 2, 2),
|
||||
downsamples=(True, True, True, True),
|
||||
enc_dilations=(1, 1, 1, 1, 1),
|
||||
dec_dilations=(1, 1, 1, 1),
|
||||
with_cp=False,
|
||||
conv_cfg=None,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
upsample_cfg=dict(type='InterpConv'),
|
||||
norm_eval=False),
|
||||
decode_head=dict(
|
||||
type='ASPPHead',
|
||||
in_channels=64,
|
||||
in_index=4,
|
||||
channels=16,
|
||||
dilations=(1, 12, 24, 36),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
in_index=3,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='slide', crop_size=256, stride=170))
|
||||
@@ -0,0 +1,54 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='DepthwiseSeparableASPPHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
dilations=(1, 12, 24, 36),
|
||||
c1_in_channels=256,
|
||||
c1_channels=48,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
52
Seg_All_In_One_MMSeg/configs/_base_/models/dmnet_r50-d8.py
Normal file
52
Seg_All_In_One_MMSeg/configs/_base_/models/dmnet_r50-d8.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='DMHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
filter_sizes=(1, 3, 5, 7),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
54
Seg_All_In_One_MMSeg/configs/_base_/models/dnl_r50-d8.py
Normal file
54
Seg_All_In_One_MMSeg/configs/_base_/models/dnl_r50-d8.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='DNLHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
reduction=2,
|
||||
use_scale=True,
|
||||
mode='embedded_gaussian',
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
39
Seg_All_In_One_MMSeg/configs/_base_/models/dpt_vit-b16.py
Normal file
39
Seg_All_In_One_MMSeg/configs/_base_/models/dpt_vit-b16.py
Normal file
@@ -0,0 +1,39 @@
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa
|
||||
backbone=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=224,
|
||||
embed_dims=768,
|
||||
num_layers=12,
|
||||
num_heads=12,
|
||||
out_indices=(2, 5, 8, 11),
|
||||
final_norm=False,
|
||||
with_cls_token=True,
|
||||
output_cls_token=True),
|
||||
decode_head=dict(
|
||||
type='DPTHead',
|
||||
in_channels=(768, 768, 768, 768),
|
||||
channels=256,
|
||||
embed_dims=768,
|
||||
post_process_channels=[96, 192, 384, 768],
|
||||
num_classes=150,
|
||||
readout_type='project',
|
||||
input_transform='multiple_select',
|
||||
in_index=(0, 1, 2, 3),
|
||||
norm_cfg=norm_cfg,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=None,
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole')) # yapf: disable
|
||||
55
Seg_All_In_One_MMSeg/configs/_base_/models/emanet_r50-d8.py
Normal file
55
Seg_All_In_One_MMSeg/configs/_base_/models/emanet_r50-d8.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='EMAHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=256,
|
||||
ema_channels=512,
|
||||
num_bases=64,
|
||||
num_stages=3,
|
||||
momentum=0.1,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
92
Seg_All_In_One_MMSeg/configs/_base_/models/en_bisenetv2.py
Normal file
92
Seg_All_In_One_MMSeg/configs/_base_/models/en_bisenetv2.py
Normal file
@@ -0,0 +1,92 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='EnBiSeNetV2',
|
||||
in_channels=3,
|
||||
detail_channels_stages=(64, 64, 128),
|
||||
semantic_channels_stages=(16, 32, 64, 128),
|
||||
semantic_expansion_ratio=6,
|
||||
bga_channels=128,
|
||||
out_indices=(0, 1, 2, 3, 4),
|
||||
init_cfg=None,
|
||||
align_corners=False),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128, # Should match bga_channels
|
||||
in_index=4, # The final output from backbone
|
||||
channels=1024,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128, # from detail branch
|
||||
in_index=0,
|
||||
channels=256,
|
||||
num_convs=2,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=64, # from semantic branch stage 3
|
||||
in_index=1,
|
||||
channels=256,
|
||||
num_convs=2,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128, # from semantic branch stage 4
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=2,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128, # from semantic fused output
|
||||
in_index=3,
|
||||
channels=256,
|
||||
num_convs=2,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
56
Seg_All_In_One_MMSeg/configs/_base_/models/encnet_r50-d8.py
Normal file
56
Seg_All_In_One_MMSeg/configs/_base_/models/encnet_r50-d8.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='EncHead',
|
||||
in_channels=[512, 1024, 2048],
|
||||
in_index=(1, 2, 3),
|
||||
channels=512,
|
||||
num_codes=32,
|
||||
use_se_loss=True,
|
||||
add_lateral=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
loss_se_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
40
Seg_All_In_One_MMSeg/configs/_base_/models/erfnet_fcn.py
Normal file
40
Seg_All_In_One_MMSeg/configs/_base_/models/erfnet_fcn.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='ERFNet',
|
||||
in_channels=3,
|
||||
enc_downsample_channels=(16, 64, 128),
|
||||
enc_stage_non_bottlenecks=(5, 8),
|
||||
enc_non_bottleneck_dilations=(2, 4, 8, 16),
|
||||
enc_non_bottleneck_channels=(64, 128),
|
||||
dec_upsample_channels=(64, 16),
|
||||
dec_stages_non_bottleneck=(2, 2),
|
||||
dec_non_bottleneck_channels=(64, 16),
|
||||
dropout_ratio=0.1,
|
||||
init_cfg=None),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=16,
|
||||
channels=128,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
65
Seg_All_In_One_MMSeg/configs/_base_/models/fast_scnn.py
Normal file
65
Seg_All_In_One_MMSeg/configs/_base_/models/fast_scnn.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='FastSCNN',
|
||||
downsample_dw_channels=(32, 48),
|
||||
global_in_channels=64,
|
||||
global_block_channels=(64, 96, 128),
|
||||
global_block_strides=(2, 2, 1),
|
||||
global_out_channels=128,
|
||||
higher_in_channels=64,
|
||||
lower_in_channels=128,
|
||||
fusion_out_channels=128,
|
||||
out_indices=(0, 1, 2),
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False),
|
||||
decode_head=dict(
|
||||
type='DepthwiseSeparableFCNHead',
|
||||
in_channels=128,
|
||||
channels=128,
|
||||
concat_input=False,
|
||||
num_classes=19,
|
||||
in_index=-1,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=32,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=-2,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=64,
|
||||
channels=32,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=-3,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,61 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c', # 'open-mmlab://resnet18_v1c', 'open-mmlab://resnet50_v1c' 'open-mmlab://resnet101_v1c'
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50, # 18, 50, 101,
|
||||
num_stages=4,
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 2, 2),
|
||||
out_indices=(1, 2, 3),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
neck=dict(
|
||||
type='JPU',
|
||||
in_channels=(512, 1024, 2048), # (128, 256, 512), (512, 1024, 2048), (512, 1024, 2048)
|
||||
mid_channels=512, # 128, 512, 512
|
||||
start_level=0,
|
||||
end_level=-1,
|
||||
dilations=(1, 2, 4, 8),
|
||||
align_corners=False,
|
||||
norm_cfg=norm_cfg),
|
||||
decode_head=dict(
|
||||
type='PSPHead',
|
||||
in_channels=2048, # 512, 2048, 2048
|
||||
in_index=2,
|
||||
channels=512, # 128, 512, 512
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024, # 256, 1024, 1024
|
||||
in_index=1,
|
||||
channels=256, # 64, 256, 256
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
60
Seg_All_In_One_MMSeg/configs/_base_/models/fcn_hr18.py
Normal file
60
Seg_All_In_One_MMSeg/configs/_base_/models/fcn_hr18.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://msra/hrnetv2_w18',
|
||||
backbone=dict(
|
||||
type='HRNet',
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
extra=dict(
|
||||
stage1=dict(
|
||||
num_modules=1,
|
||||
num_branches=1,
|
||||
block='BOTTLENECK',
|
||||
num_blocks=(4, ),
|
||||
num_channels=(64, )),
|
||||
stage2=dict(
|
||||
num_modules=1,
|
||||
num_branches=2,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4),
|
||||
num_channels=(18, 36)),
|
||||
stage3=dict(
|
||||
num_modules=4,
|
||||
num_branches=3,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4, 4),
|
||||
num_channels=(18, 36, 72)),
|
||||
stage4=dict(
|
||||
num_modules=3,
|
||||
num_branches=4,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4, 4, 4),
|
||||
num_channels=(18, 36, 72, 144)))),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=[18, 36, 72, 144],
|
||||
in_index=(0, 1, 2, 3),
|
||||
channels=sum([18, 36, 72, 144]),
|
||||
input_transform='resize_concat',
|
||||
kernel_size=1,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=-1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
53
Seg_All_In_One_MMSeg/configs/_base_/models/fcn_r50-d8.py
Normal file
53
Seg_All_In_One_MMSeg/configs/_base_/models/fcn_r50-d8.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
num_convs=2,
|
||||
concat_input=True,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,59 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='UNet',
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
dec_num_convs=(2, 2, 2, 2),
|
||||
downsamples=(True, True, True, True),
|
||||
enc_dilations=(1, 1, 1, 1, 1),
|
||||
dec_dilations=(1, 1, 1, 1),
|
||||
with_cp=False,
|
||||
conv_cfg=None,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
upsample_cfg=dict(type='InterpConv'),
|
||||
norm_eval=False),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=64,
|
||||
in_index=4,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
in_index=3,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='slide', crop_size=256, stride=170))
|
||||
@@ -0,0 +1,54 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/poolformer/poolformer-s12_3rdparty_32xb128_in1k_20220414-f8d83051.pth' # noqa
|
||||
# TODO: delete custom_imports after mmpretrain supports auto import
|
||||
# please install mmpretrain >= 1.0.0rc7
|
||||
# import mmpretrain.models to trigger register_module in mmpretrain
|
||||
custom_imports = dict(
|
||||
imports=['mmpretrain.models'], allow_failed_imports=False)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='mmpretrain.PoolFormer',
|
||||
arch='s12',
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint=checkpoint_file, prefix='backbone.'),
|
||||
in_patch_size=7,
|
||||
in_stride=4,
|
||||
in_pad=2,
|
||||
down_patch_size=3,
|
||||
down_stride=2,
|
||||
down_pad=1,
|
||||
drop_rate=0.,
|
||||
drop_path_rate=0.,
|
||||
out_indices=(0, 2, 4, 6),
|
||||
frozen_stages=0,
|
||||
),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
num_outs=4),
|
||||
decode_head=dict(
|
||||
type='FPNHead',
|
||||
in_channels=[256, 256, 256, 256],
|
||||
in_index=[0, 1, 2, 3],
|
||||
feature_strides=[4, 8, 16, 32],
|
||||
channels=128,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
44
Seg_All_In_One_MMSeg/configs/_base_/models/fpn_r50.py
Normal file
44
Seg_All_In_One_MMSeg/configs/_base_/models/fpn_r50.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 1, 1),
|
||||
strides=(1, 2, 2, 2),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
num_outs=4),
|
||||
decode_head=dict(
|
||||
type='FPNHead',
|
||||
in_channels=[256, 256, 256, 256],
|
||||
in_index=[0, 1, 2, 3],
|
||||
feature_strides=[4, 8, 16, 32],
|
||||
channels=128,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
54
Seg_All_In_One_MMSeg/configs/_base_/models/gcnet_r50-d8.py
Normal file
54
Seg_All_In_One_MMSeg/configs/_base_/models/gcnet_r50-d8.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='GCHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
ratio=1 / 4.,
|
||||
pooling_type='att',
|
||||
fusion_types=('channel_add', ),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
82
Seg_All_In_One_MMSeg/configs/_base_/models/icnet_r50-d8.py
Normal file
82
Seg_All_In_One_MMSeg/configs/_base_/models/icnet_r50-d8.py
Normal file
@@ -0,0 +1,82 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='ICNet',
|
||||
backbone_cfg=dict(
|
||||
type='ResNetV1c',
|
||||
in_channels=3,
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
in_channels=3,
|
||||
layer_channels=(512, 2048),
|
||||
light_branch_middle_channels=32,
|
||||
psp_out_channels=512,
|
||||
out_channels=(64, 256, 256),
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
),
|
||||
neck=dict(
|
||||
type='ICNeck',
|
||||
in_channels=(64, 256, 256),
|
||||
out_channels=128,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=128,
|
||||
num_convs=1,
|
||||
in_index=2,
|
||||
dropout_ratio=0,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=128,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=0,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=128,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=1,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
53
Seg_All_In_One_MMSeg/configs/_base_/models/isanet_r50-d8.py
Normal file
53
Seg_All_In_One_MMSeg/configs/_base_/models/isanet_r50-d8.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='ISAHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
isa_channels=256,
|
||||
down_factor=(8, 8),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
83
Seg_All_In_One_MMSeg/configs/_base_/models/knet_r50-d8_my.py
Normal file
83
Seg_All_In_One_MMSeg/configs/_base_/models/knet_r50-d8_my.py
Normal file
@@ -0,0 +1,83 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
# model settings
|
||||
num_stages = 3
|
||||
conv_kernel_size = 1
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='IterativeDecodeHead',
|
||||
num_stages=num_stages,
|
||||
kernel_update_head=[
|
||||
dict(
|
||||
type='KernelUpdateHead',
|
||||
num_classes=150,
|
||||
num_ffn_fcs=2,
|
||||
num_heads=8,
|
||||
num_mask_fcs=1,
|
||||
feedforward_channels=2048,
|
||||
in_channels=512,
|
||||
out_channels=512,
|
||||
dropout=0.0,
|
||||
conv_kernel_size=conv_kernel_size,
|
||||
ffn_act_cfg=dict(type='ReLU', inplace=True),
|
||||
with_ffn=True,
|
||||
feat_transform_cfg=dict(
|
||||
conv_cfg=dict(type='Conv2d'), act_cfg=None),
|
||||
kernel_updator_cfg=dict(
|
||||
type='KernelUpdator',
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
out_channels=256,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
norm_cfg=dict(type='LN'))) for _ in range(num_stages)
|
||||
],
|
||||
kernel_generate_head=dict(
|
||||
type='ASPPHead',
|
||||
# in_channels=2048,
|
||||
# in_index=3,
|
||||
# channels=512,
|
||||
# dilations=(1, 12, 24, 36),
|
||||
# dropout_ratio=0.1,
|
||||
num_classes=150,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=150,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
|
||||
33
Seg_All_In_One_MMSeg/configs/_base_/models/lraspp_m-v3-d8.py
Normal file
33
Seg_All_In_One_MMSeg/configs/_base_/models/lraspp_m-v3-d8.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='MobileNetV3',
|
||||
arch='large',
|
||||
out_indices=(1, 3, 16),
|
||||
norm_cfg=norm_cfg),
|
||||
decode_head=dict(
|
||||
type='LRASPPHead',
|
||||
in_channels=(16, 24, 960),
|
||||
in_index=(0, 1, 2),
|
||||
channels=128,
|
||||
input_transform='multiple_select',
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
136
Seg_All_In_One_MMSeg/configs/_base_/models/mask2former_my.py
Normal file
136
Seg_All_In_One_MMSeg/configs/_base_/models/mask2former_my.py
Normal file
@@ -0,0 +1,136 @@
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='SwinTransformer',
|
||||
# pretrain_img_size=384,
|
||||
# embed_dims=128,
|
||||
# depths=[2, 2, 18, 2],
|
||||
# num_heads=[4, 8, 16, 32],
|
||||
# window_size=12,
|
||||
# mlp_ratio=4,
|
||||
# qkv_bias=True,
|
||||
# qk_scale=None,
|
||||
# drop_rate=0.,
|
||||
# attn_drop_rate=0.,
|
||||
# drop_path_rate=0.3,
|
||||
# patch_norm=True,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
# with_cp=False,
|
||||
frozen_stages=-1,
|
||||
init_cfg=dict(type='Pretrained', checkpoint='https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth')),
|
||||
decode_head=dict(
|
||||
type='Mask2FormerHead',
|
||||
in_channels=[128, 256, 512, 1024], # TODO
|
||||
strides=[4, 8, 16, 32],
|
||||
feat_channels=256,
|
||||
out_channels=256,
|
||||
num_classes=150, # TODO
|
||||
num_queries=100,
|
||||
num_transformer_feat_level=3,
|
||||
align_corners=False,
|
||||
pixel_decoder=dict(
|
||||
type='mmdet.MSDeformAttnPixelDecoder',
|
||||
num_outs=3,
|
||||
norm_cfg=dict(type='GN', num_groups=32),
|
||||
act_cfg=dict(type='ReLU'),
|
||||
encoder=dict( # DeformableDetrTransformerEncoder
|
||||
num_layers=6,
|
||||
layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
|
||||
self_attn_cfg=dict( # MultiScaleDeformableAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
num_levels=3,
|
||||
num_points=4,
|
||||
im2col_step=64,
|
||||
dropout=0.0,
|
||||
batch_first=True,
|
||||
norm_cfg=None,
|
||||
init_cfg=None),
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=1024,
|
||||
num_fcs=2,
|
||||
ffn_drop=0.0,
|
||||
act_cfg=dict(type='ReLU', inplace=True))),
|
||||
init_cfg=None),
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
init_cfg=None),
|
||||
enforce_decoder_input_project=False,
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
transformer_decoder=dict( # Mask2FormerTransformerDecoder
|
||||
return_intermediate=True,
|
||||
num_layers=9,
|
||||
layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
|
||||
self_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.0,
|
||||
proj_drop=0.0,
|
||||
dropout_layer=None,
|
||||
batch_first=True),
|
||||
cross_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.0,
|
||||
proj_drop=0.0,
|
||||
dropout_layer=None,
|
||||
batch_first=True),
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=2048,
|
||||
num_fcs=2,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
ffn_drop=0.0,
|
||||
dropout_layer=None,
|
||||
add_identity=True)),
|
||||
init_cfg=None),
|
||||
loss_cls=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
use_sigmoid=False,
|
||||
loss_weight=2.0,
|
||||
reduction='mean',
|
||||
class_weight=[1.0] * 150 + [0.1]), # TODO
|
||||
loss_mask=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
reduction='mean',
|
||||
loss_weight=5.0),
|
||||
loss_dice=dict(
|
||||
type='mmdet.DiceLoss',
|
||||
use_sigmoid=True,
|
||||
activate=True,
|
||||
reduction='mean',
|
||||
naive_dice=True,
|
||||
eps=1.0,
|
||||
loss_weight=5.0),
|
||||
train_cfg=dict(
|
||||
num_points=12544,
|
||||
oversample_ratio=3.0,
|
||||
importance_sample_ratio=0.75,
|
||||
assigner=dict(
|
||||
type='mmdet.HungarianAssigner',
|
||||
match_costs=[
|
||||
dict(type='mmdet.ClassificationCost', weight=2.0),
|
||||
dict(
|
||||
type='mmdet.CrossEntropyLossCost',
|
||||
weight=5.0,
|
||||
use_sigmoid=True),
|
||||
dict(
|
||||
type='mmdet.DiceCost',
|
||||
weight=5.0,
|
||||
pred_act=True,
|
||||
eps=1.0)
|
||||
]),
|
||||
sampler=dict(type='mmdet.MaskPseudoSampler'))),
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
108
Seg_All_In_One_MMSeg/configs/_base_/models/maskformer_my.py
Normal file
108
Seg_All_In_One_MMSeg/configs/_base_/models/maskformer_my.py
Normal file
@@ -0,0 +1,108 @@
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
# size=crop_size,
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
# model_cfg
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
# depth=50,
|
||||
# num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
# dilations=(1, 1, 1, 1),
|
||||
# strides=(1, 2, 2, 2),
|
||||
# norm_cfg=norm_cfg,
|
||||
# norm_eval=True,
|
||||
# style='pytorch',
|
||||
# contract_dilation=True,
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
||||
decode_head=dict(
|
||||
type='MaskFormerHead',
|
||||
in_channels=[256, 512, 1024,
|
||||
2048], # input channels of pixel_decoder modules
|
||||
feat_channels=256,
|
||||
in_index=[0, 1, 2, 3],
|
||||
num_classes=150, # TODO
|
||||
out_channels=256,
|
||||
num_queries=100,
|
||||
pixel_decoder=dict(
|
||||
type='mmdet.PixelDecoder',
|
||||
norm_cfg=dict(type='GN', num_groups=32),
|
||||
act_cfg=dict(type='ReLU')),
|
||||
enforce_decoder_input_project=False,
|
||||
positional_encoding=dict( # SinePositionalEncoding
|
||||
num_feats=128, normalize=True),
|
||||
transformer_decoder=dict( # DetrTransformerDecoder
|
||||
return_intermediate=True,
|
||||
num_layers=6,
|
||||
layer_cfg=dict( # DetrTransformerDecoderLayer
|
||||
self_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.1,
|
||||
proj_drop=0.1,
|
||||
dropout_layer=None,
|
||||
batch_first=True),
|
||||
cross_attn_cfg=dict( # MultiheadAttention
|
||||
embed_dims=256,
|
||||
num_heads=8,
|
||||
attn_drop=0.1,
|
||||
proj_drop=0.1,
|
||||
dropout_layer=None,
|
||||
batch_first=True),
|
||||
ffn_cfg=dict(
|
||||
embed_dims=256,
|
||||
feedforward_channels=2048,
|
||||
num_fcs=2,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
ffn_drop=0.1,
|
||||
dropout_layer=None,
|
||||
add_identity=True)),
|
||||
init_cfg=None),
|
||||
loss_cls=dict(
|
||||
type='mmdet.CrossEntropyLoss',
|
||||
use_sigmoid=False,
|
||||
loss_weight=1.0,
|
||||
reduction='mean',
|
||||
class_weight=[1.0] * 150 + [0.1]), # TODO
|
||||
loss_mask=dict(
|
||||
type='mmdet.FocalLoss',
|
||||
use_sigmoid=True,
|
||||
gamma=2.0,
|
||||
alpha=0.25,
|
||||
reduction='mean',
|
||||
loss_weight=20.0),
|
||||
loss_dice=dict(
|
||||
type='mmdet.DiceLoss',
|
||||
use_sigmoid=True,
|
||||
activate=True,
|
||||
reduction='mean',
|
||||
naive_dice=True,
|
||||
eps=1.0,
|
||||
loss_weight=1.0),
|
||||
train_cfg=dict(
|
||||
assigner=dict(
|
||||
type='mmdet.HungarianAssigner',
|
||||
match_costs=[
|
||||
dict(type='mmdet.ClassificationCost', weight=1.0),
|
||||
dict(
|
||||
type='mmdet.FocalLossCost',
|
||||
weight=20.0,
|
||||
binary_input=True),
|
||||
dict(
|
||||
type='mmdet.DiceCost',
|
||||
weight=1.0,
|
||||
pred_act=True,
|
||||
eps=1.0)
|
||||
]),
|
||||
sampler=dict(type='mmdet.MaskPseudoSampler'))),
|
||||
# training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'),
|
||||
)
|
||||
@@ -0,0 +1,88 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='My_BiSeNetV2_A1',
|
||||
detail_channels=(64, 64, 128),
|
||||
semantic_channels=(16, 32, 64, 128),
|
||||
semantic_expansion_ratio=6,
|
||||
bga_channels=128,
|
||||
out_indices=(0, 1, 2, 3, 4),
|
||||
init_cfg=None,
|
||||
align_corners=False),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
in_index=0,
|
||||
channels=1024,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=16,
|
||||
channels=16,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=1,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=32,
|
||||
channels=64,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=2,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=64,
|
||||
channels=256,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=3,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=1024,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=4,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,88 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='My_BiSeNetV2_A2',
|
||||
detail_channels=(64, 64, 128),
|
||||
semantic_channels=(16, 32, 64, 128),
|
||||
semantic_expansion_ratio=6,
|
||||
bga_channels=128,
|
||||
out_indices=(0, 1, 2, 3, 4),
|
||||
init_cfg=None,
|
||||
align_corners=False),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
in_index=0,
|
||||
channels=1024,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=16,
|
||||
channels=16,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=1,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=32,
|
||||
channels=64,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=2,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=64,
|
||||
channels=256,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=3,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=1024,
|
||||
num_convs=2,
|
||||
num_classes=19,
|
||||
in_index=4,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,54 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='NLHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
reduction=2,
|
||||
use_scale=True,
|
||||
mode='embedded_gaussian',
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
76
Seg_All_In_One_MMSeg/configs/_base_/models/ocrnet_hr18.py
Normal file
76
Seg_All_In_One_MMSeg/configs/_base_/models/ocrnet_hr18.py
Normal file
@@ -0,0 +1,76 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='CascadeEncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
num_stages=2,
|
||||
pretrained='open-mmlab://msra/hrnetv2_w18',
|
||||
backbone=dict(
|
||||
type='HRNet',
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
extra=dict(
|
||||
stage1=dict(
|
||||
num_modules=1,
|
||||
num_branches=1,
|
||||
block='BOTTLENECK',
|
||||
num_blocks=(4, ),
|
||||
num_channels=(64, )),
|
||||
stage2=dict(
|
||||
num_modules=1,
|
||||
num_branches=2,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4),
|
||||
num_channels=(18, 36)),
|
||||
stage3=dict(
|
||||
num_modules=4,
|
||||
num_branches=3,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4, 4),
|
||||
num_channels=(18, 36, 72)),
|
||||
stage4=dict(
|
||||
num_modules=3,
|
||||
num_branches=4,
|
||||
block='BASIC',
|
||||
num_blocks=(4, 4, 4, 4),
|
||||
num_channels=(18, 36, 72, 144)))),
|
||||
decode_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=[18, 36, 72, 144],
|
||||
channels=sum([18, 36, 72, 144]),
|
||||
in_index=(0, 1, 2, 3),
|
||||
input_transform='resize_concat',
|
||||
kernel_size=1,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=-1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='OCRHead',
|
||||
in_channels=[18, 36, 72, 144],
|
||||
in_index=(0, 1, 2, 3),
|
||||
input_transform='resize_concat',
|
||||
channels=512,
|
||||
ocr_channels=256,
|
||||
dropout_ratio=-1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
55
Seg_All_In_One_MMSeg/configs/_base_/models/ocrnet_r50-d8.py
Normal file
55
Seg_All_In_One_MMSeg/configs/_base_/models/ocrnet_r50-d8.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='CascadeEncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
num_stages=2,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='OCRHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
ocr_channels=256,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
68
Seg_All_In_One_MMSeg/configs/_base_/models/pidnet.py
Normal file
68
Seg_All_In_One_MMSeg/configs/_base_/models/pidnet.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# The class_weight is borrowed from https://github.com/openseg-group/OCNet.pytorch/issues/14 # noqa
|
||||
# Licensed under the MIT License
|
||||
# 无侧重
|
||||
# class_weight = [
|
||||
# 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786, 1.0023,
|
||||
# 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 1.0865, 1.1529, 1.0507
|
||||
# ]
|
||||
# class_weight = [
|
||||
# 0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786, 1.0023,
|
||||
# 0.9539, 0.9843, 1.1116
|
||||
# ]
|
||||
|
||||
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/pidnet/pidnet-s_imagenet1k_20230306-715e6273.pth' # noqa
|
||||
# crop_size = (1024, 1024)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255,
|
||||
size=(1024, 1024))
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='PIDNet',
|
||||
in_channels=3,
|
||||
channels=32,
|
||||
ppm_channels=96,
|
||||
num_stem_blocks=2,
|
||||
num_branch_blocks=3,
|
||||
align_corners=False,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
|
||||
decode_head=dict(
|
||||
type='PIDHead',
|
||||
in_channels=128,
|
||||
channels=128,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU', inplace=True),
|
||||
align_corners=True,
|
||||
loss_decode=[
|
||||
dict(
|
||||
type='CrossEntropyLoss',
|
||||
use_sigmoid=False,
|
||||
# class_weight=class_weight,
|
||||
loss_weight=0.4),
|
||||
dict(
|
||||
type='OhemCrossEntropy',
|
||||
thres=0.9,
|
||||
min_kept=131072,
|
||||
# class_weight=class_weight,
|
||||
loss_weight=1.0),
|
||||
dict(type='BoundaryLoss', loss_weight=20.0),
|
||||
dict(
|
||||
type='OhemCrossEntropy',
|
||||
thres=0.9,
|
||||
min_kept=131072,
|
||||
# class_weight=class_weight,
|
||||
loss_weight=1.0)
|
||||
]),
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
64
Seg_All_In_One_MMSeg/configs/_base_/models/pointrend_r50.py
Normal file
64
Seg_All_In_One_MMSeg/configs/_base_/models/pointrend_r50.py
Normal file
@@ -0,0 +1,64 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='CascadeEncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
num_stages=2,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 1, 1),
|
||||
strides=(1, 2, 2, 2),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
num_outs=4),
|
||||
decode_head=[
|
||||
dict(
|
||||
type='FPNHead',
|
||||
in_channels=[256, 256, 256, 256],
|
||||
in_index=[0, 1, 2, 3],
|
||||
feature_strides=[4, 8, 16, 32],
|
||||
channels=128,
|
||||
dropout_ratio=-1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='PointHead',
|
||||
in_channels=[256],
|
||||
in_index=[0],
|
||||
channels=256,
|
||||
num_fcs=3,
|
||||
coarse_pred_each_layer=True,
|
||||
dropout_ratio=-1,
|
||||
num_classes=19,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75),
|
||||
test_cfg=dict(
|
||||
mode='whole',
|
||||
subdivision_steps=2,
|
||||
subdivision_num_points=8196,
|
||||
scale_factor=2))
|
||||
57
Seg_All_In_One_MMSeg/configs/_base_/models/psanet_r50-d8.py
Normal file
57
Seg_All_In_One_MMSeg/configs/_base_/models/psanet_r50-d8.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='PSAHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
mask_size=(97, 97),
|
||||
psa_type='bi-direction',
|
||||
compact=False,
|
||||
shrink_factor=2,
|
||||
normalization_factor=1.0,
|
||||
psa_softmax=True,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
52
Seg_All_In_One_MMSeg/configs/_base_/models/pspnet_r50-d8.py
Normal file
52
Seg_All_In_One_MMSeg/configs/_base_/models/pspnet_r50-d8.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 2, 4),
|
||||
strides=(1, 2, 1, 1),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='PSPHead',
|
||||
in_channels=2048,
|
||||
in_index=3,
|
||||
channels=512,
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,58 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='UNet',
|
||||
in_channels=3,
|
||||
base_channels=64,
|
||||
num_stages=5,
|
||||
strides=(1, 1, 1, 1, 1),
|
||||
enc_num_convs=(2, 2, 2, 2, 2),
|
||||
dec_num_convs=(2, 2, 2, 2),
|
||||
downsamples=(True, True, True, True),
|
||||
enc_dilations=(1, 1, 1, 1, 1),
|
||||
dec_dilations=(1, 1, 1, 1),
|
||||
with_cp=False,
|
||||
conv_cfg=None,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
upsample_cfg=dict(type='InterpConv'),
|
||||
norm_eval=False),
|
||||
decode_head=dict(
|
||||
type='PSPHead',
|
||||
in_channels=64,
|
||||
in_index=4,
|
||||
channels=16,
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
in_index=3,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=2,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='slide', crop_size=256, stride=170))
|
||||
137
Seg_All_In_One_MMSeg/configs/_base_/models/san_vit-b16.py
Normal file
137
Seg_All_In_One_MMSeg/configs/_base_/models/san_vit-b16.py
Normal file
@@ -0,0 +1,137 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[122.7709, 116.7460, 104.0937],
|
||||
std=[68.5005, 66.6322, 70.3232],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255,
|
||||
size_divisor=640,
|
||||
test_cfg=dict(size_divisor=32))
|
||||
|
||||
num_classes = 171
|
||||
model = dict(
|
||||
type='MultimodalEncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='pretrain/clip_vit_base_patch16_224.pth',
|
||||
asymetric_input=True,
|
||||
encoder_resolution=0.5,
|
||||
image_encoder=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=(224, 224),
|
||||
patch_size=16,
|
||||
patch_pad=0,
|
||||
in_channels=3,
|
||||
embed_dims=768,
|
||||
num_layers=9,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
out_origin=True,
|
||||
out_indices=(2, 5, 8),
|
||||
qkv_bias=True,
|
||||
drop_rate=0.0,
|
||||
attn_drop_rate=0.0,
|
||||
drop_path_rate=0.0,
|
||||
with_cls_token=True,
|
||||
output_cls_token=True,
|
||||
patch_bias=False,
|
||||
pre_norm=True,
|
||||
norm_cfg=dict(type='LN', eps=1e-5),
|
||||
act_cfg=dict(type='QuickGELU'),
|
||||
norm_eval=False,
|
||||
interpolate_mode='bicubic',
|
||||
frozen_exclude=['pos_embed']),
|
||||
text_encoder=dict(
|
||||
type='CLIPTextEncoder',
|
||||
dataset_name=None,
|
||||
templates='vild',
|
||||
embed_dims=512,
|
||||
num_layers=12,
|
||||
num_heads=8,
|
||||
mlp_ratio=4,
|
||||
output_dims=512,
|
||||
cache_feature=True,
|
||||
cat_bg=True,
|
||||
norm_cfg=dict(type='LN', eps=1e-5)
|
||||
),
|
||||
decode_head=dict(
|
||||
type='SideAdapterCLIPHead',
|
||||
num_classes=num_classes,
|
||||
deep_supervision_idxs=[7],
|
||||
san_cfg=dict(
|
||||
in_channels=3,
|
||||
clip_channels=768,
|
||||
embed_dims=240,
|
||||
patch_size=16,
|
||||
patch_bias=True,
|
||||
num_queries=100,
|
||||
cfg_encoder=dict(
|
||||
num_encode_layer=8,
|
||||
num_heads=6,
|
||||
mlp_ratio=4
|
||||
),
|
||||
fusion_index=[0, 1, 2, 3],
|
||||
cfg_decoder=dict(
|
||||
num_heads=12,
|
||||
num_layers=1,
|
||||
embed_channels=256,
|
||||
mlp_channels=256,
|
||||
num_mlp=3,
|
||||
rescale=True),
|
||||
norm_cfg=dict(type='LN', eps=1e-6),
|
||||
),
|
||||
maskgen_cfg=dict(
|
||||
sos_token_format='cls_token',
|
||||
sos_token_num=100,
|
||||
cross_attn=False,
|
||||
num_layers=3,
|
||||
embed_dims=768,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
out_dims=512,
|
||||
final_norm=True,
|
||||
act_cfg=dict(type='QuickGELU'),
|
||||
norm_cfg=dict(type='LN', eps=1e-5),
|
||||
frozen_exclude=[]
|
||||
),
|
||||
align_corners=False,
|
||||
train_cfg=dict(
|
||||
num_points=12544,
|
||||
oversample_ratio=3.0,
|
||||
importance_sample_ratio=0.75,
|
||||
assigner=dict(
|
||||
type='HungarianAssigner',
|
||||
match_costs=[
|
||||
dict(type='ClassificationCost', weight=2.0),
|
||||
dict(
|
||||
type='CrossEntropyLossCost',
|
||||
weight=5.0,
|
||||
use_sigmoid=True),
|
||||
dict(
|
||||
type='DiceCost',
|
||||
weight=5.0,
|
||||
pred_act=True,
|
||||
eps=1.0)
|
||||
])),
|
||||
loss_decode=[dict(type='CrossEntropyLoss',
|
||||
loss_name='loss_cls_ce',
|
||||
loss_weight=2.0,
|
||||
class_weight=[1.0] * num_classes + [0.1]),
|
||||
dict(type='CrossEntropyLoss',
|
||||
use_sigmoid=True,
|
||||
loss_name='loss_mask_ce',
|
||||
loss_weight=5.0),
|
||||
dict(type='DiceLoss',
|
||||
ignore_index=None,
|
||||
naive_dice=True,
|
||||
eps=1,
|
||||
loss_name='loss_mask_dice',
|
||||
loss_weight=5.0)
|
||||
]),
|
||||
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole')) # yapf: disable
|
||||
@@ -0,0 +1,42 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='MixVisionTransformer',
|
||||
in_channels=3,
|
||||
embed_dims=32,
|
||||
num_stages=4,
|
||||
num_layers=[2, 2, 2, 2],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
patch_sizes=[7, 3, 3, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
out_indices=(0, 1, 2, 3),
|
||||
mlp_ratio=4,
|
||||
qkv_bias=True,
|
||||
drop_rate=0.0,
|
||||
attn_drop_rate=0.0,
|
||||
drop_path_rate=0.1),
|
||||
decode_head=dict(
|
||||
type='SegformerHead',
|
||||
in_channels=[32, 64, 160, 256],
|
||||
in_index=[0, 1, 2, 3],
|
||||
channels=256,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,44 @@
|
||||
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth' # noqa
|
||||
# model settings
|
||||
backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[127.5, 127.5, 127.5],
|
||||
std=[127.5, 127.5, 127.5],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=checkpoint,
|
||||
backbone=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=(512, 512),
|
||||
patch_size=16,
|
||||
in_channels=3,
|
||||
embed_dims=768,
|
||||
num_layers=12,
|
||||
num_heads=12,
|
||||
drop_path_rate=0.1,
|
||||
attn_drop_rate=0.0,
|
||||
drop_rate=0.0,
|
||||
final_norm=True,
|
||||
norm_cfg=backbone_norm_cfg,
|
||||
with_cls_token=True,
|
||||
interpolate_mode='bicubic',
|
||||
),
|
||||
decode_head=dict(
|
||||
type='SegmenterMaskTransformerHead',
|
||||
in_channels=768,
|
||||
channels=768,
|
||||
num_classes=150,
|
||||
num_layers=2,
|
||||
num_heads=12,
|
||||
embed_dims=768,
|
||||
dropout_ratio=0.0,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
),
|
||||
test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)),
|
||||
)
|
||||
103
Seg_All_In_One_MMSeg/configs/_base_/models/setr_mla.py
Normal file
103
Seg_All_In_One_MMSeg/configs/_base_/models/setr_mla.py
Normal file
@@ -0,0 +1,103 @@
|
||||
# model settings
|
||||
backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth',
|
||||
backbone=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=(768, 768),
|
||||
patch_size=16,
|
||||
in_channels=3,
|
||||
embed_dims=1024,
|
||||
num_layers=24,
|
||||
num_heads=16,
|
||||
out_indices=(5, 11, 17, 23),
|
||||
drop_rate=0.1,
|
||||
norm_cfg=backbone_norm_cfg,
|
||||
with_cls_token=False,
|
||||
interpolate_mode='bilinear',
|
||||
),
|
||||
neck=dict(
|
||||
type='MLANeck',
|
||||
in_channels=[1024, 1024, 1024, 1024],
|
||||
out_channels=256,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
),
|
||||
decode_head=dict(
|
||||
type='SETRMLAHead',
|
||||
in_channels=(256, 256, 256, 256),
|
||||
channels=512,
|
||||
in_index=(0, 1, 2, 3),
|
||||
dropout_ratio=0,
|
||||
mla_channels=128,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=256,
|
||||
channels=256,
|
||||
in_index=0,
|
||||
dropout_ratio=0,
|
||||
num_convs=0,
|
||||
kernel_size=1,
|
||||
concat_input=False,
|
||||
num_classes=19,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=256,
|
||||
channels=256,
|
||||
in_index=1,
|
||||
dropout_ratio=0,
|
||||
num_convs=0,
|
||||
kernel_size=1,
|
||||
concat_input=False,
|
||||
num_classes=19,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=256,
|
||||
channels=256,
|
||||
in_index=2,
|
||||
dropout_ratio=0,
|
||||
num_convs=0,
|
||||
kernel_size=1,
|
||||
concat_input=False,
|
||||
num_classes=19,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=256,
|
||||
channels=256,
|
||||
in_index=3,
|
||||
dropout_ratio=0,
|
||||
num_convs=0,
|
||||
kernel_size=1,
|
||||
concat_input=False,
|
||||
num_classes=19,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
],
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
88
Seg_All_In_One_MMSeg/configs/_base_/models/setr_naive.py
Normal file
88
Seg_All_In_One_MMSeg/configs/_base_/models/setr_naive.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# model settings
|
||||
backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth',
|
||||
backbone=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=(768, 768),
|
||||
patch_size=16,
|
||||
in_channels=3,
|
||||
embed_dims=1024,
|
||||
num_layers=24,
|
||||
num_heads=16,
|
||||
out_indices=(9, 14, 19, 23),
|
||||
drop_rate=0.1,
|
||||
norm_cfg=backbone_norm_cfg,
|
||||
with_cls_token=True,
|
||||
interpolate_mode='bilinear',
|
||||
),
|
||||
decode_head=dict(
|
||||
type='SETRUPHead',
|
||||
in_channels=1024,
|
||||
channels=256,
|
||||
in_index=3,
|
||||
num_classes=19,
|
||||
dropout_ratio=0,
|
||||
norm_cfg=norm_cfg,
|
||||
num_convs=1,
|
||||
up_scale=4,
|
||||
kernel_size=1,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='SETRUPHead',
|
||||
in_channels=1024,
|
||||
channels=256,
|
||||
in_index=0,
|
||||
num_classes=19,
|
||||
dropout_ratio=0,
|
||||
norm_cfg=norm_cfg,
|
||||
num_convs=1,
|
||||
up_scale=4,
|
||||
kernel_size=1,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='SETRUPHead',
|
||||
in_channels=1024,
|
||||
channels=256,
|
||||
in_index=1,
|
||||
num_classes=19,
|
||||
dropout_ratio=0,
|
||||
norm_cfg=norm_cfg,
|
||||
num_convs=1,
|
||||
up_scale=4,
|
||||
kernel_size=1,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='SETRUPHead',
|
||||
in_channels=1024,
|
||||
channels=256,
|
||||
in_index=2,
|
||||
num_classes=19,
|
||||
dropout_ratio=0,
|
||||
norm_cfg=norm_cfg,
|
||||
num_convs=1,
|
||||
up_scale=4,
|
||||
kernel_size=1,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4))
|
||||
],
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
88
Seg_All_In_One_MMSeg/configs/_base_/models/setr_pup.py
Normal file
88
Seg_All_In_One_MMSeg/configs/_base_/models/setr_pup.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# model settings
|
||||
backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True)
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth',
|
||||
backbone=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=(768, 768),
|
||||
patch_size=16,
|
||||
in_channels=3,
|
||||
embed_dims=1024,
|
||||
num_layers=24,
|
||||
num_heads=16,
|
||||
out_indices=(9, 14, 19, 23),
|
||||
drop_rate=0.1,
|
||||
norm_cfg=backbone_norm_cfg,
|
||||
with_cls_token=True,
|
||||
interpolate_mode='bilinear',
|
||||
),
|
||||
decode_head=dict(
|
||||
type='SETRUPHead',
|
||||
in_channels=1024,
|
||||
channels=256,
|
||||
in_index=3,
|
||||
num_classes=19,
|
||||
dropout_ratio=0,
|
||||
norm_cfg=norm_cfg,
|
||||
num_convs=4,
|
||||
up_scale=2,
|
||||
kernel_size=3,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='SETRUPHead',
|
||||
in_channels=1024,
|
||||
channels=256,
|
||||
in_index=0,
|
||||
num_classes=19,
|
||||
dropout_ratio=0,
|
||||
norm_cfg=norm_cfg,
|
||||
num_convs=1,
|
||||
up_scale=4,
|
||||
kernel_size=3,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='SETRUPHead',
|
||||
in_channels=1024,
|
||||
channels=256,
|
||||
in_index=1,
|
||||
num_classes=19,
|
||||
dropout_ratio=0,
|
||||
norm_cfg=norm_cfg,
|
||||
num_convs=1,
|
||||
up_scale=4,
|
||||
kernel_size=3,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
dict(
|
||||
type='SETRUPHead',
|
||||
in_channels=1024,
|
||||
channels=256,
|
||||
in_index=2,
|
||||
num_classes=19,
|
||||
dropout_ratio=0,
|
||||
norm_cfg=norm_cfg,
|
||||
num_convs=1,
|
||||
up_scale=4,
|
||||
kernel_size=3,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
],
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
91
Seg_All_In_One_MMSeg/configs/_base_/models/stdc.py
Normal file
91
Seg_All_In_One_MMSeg/configs/_base_/models/stdc.py
Normal file
@@ -0,0 +1,91 @@
|
||||
norm_cfg = dict(type='BN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='STDCContextPathNet',
|
||||
backbone_cfg=dict(
|
||||
type='STDCNet',
|
||||
stdc_type='STDCNet1',
|
||||
in_channels=3,
|
||||
channels=(32, 64, 256, 512, 1024),
|
||||
bottleneck_type='cat',
|
||||
num_convs=4,
|
||||
norm_cfg=norm_cfg,
|
||||
act_cfg=dict(type='ReLU'),
|
||||
with_final_conv=False),
|
||||
last_in_channels=(1024, 512),
|
||||
out_channels=128,
|
||||
ffm_cfg=dict(in_channels=384, out_channels=256, scale_factor=4)),
|
||||
decode_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=256,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=3,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=True,
|
||||
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=[
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=2,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='FCNHead',
|
||||
in_channels=128,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
num_classes=19,
|
||||
in_index=1,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=False,
|
||||
sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000),
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
dict(
|
||||
type='STDCHead',
|
||||
in_channels=256,
|
||||
channels=64,
|
||||
num_convs=1,
|
||||
num_classes=2,
|
||||
boundary_threshold=0.1,
|
||||
in_index=0,
|
||||
norm_cfg=norm_cfg,
|
||||
concat_input=False,
|
||||
align_corners=True,
|
||||
loss_decode=[
|
||||
dict(
|
||||
type='CrossEntropyLoss',
|
||||
loss_name='loss_ce',
|
||||
use_sigmoid=True,
|
||||
loss_weight=1.0),
|
||||
dict(type='DiceLoss', loss_name='loss_dice', loss_weight=1.0)
|
||||
]),
|
||||
],
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,53 @@
|
||||
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa
|
||||
|
||||
# model settings
|
||||
backbone_norm_cfg = dict(type='LN')
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='PCPVT',
|
||||
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
|
||||
in_channels=3,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
patch_sizes=[4, 2, 2, 2],
|
||||
strides=[4, 2, 2, 2],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
out_indices=(0, 1, 2, 3),
|
||||
qkv_bias=True,
|
||||
norm_cfg=backbone_norm_cfg,
|
||||
depths=[3, 4, 6, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
norm_after_stage=False,
|
||||
drop_rate=0.0,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.2),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[64, 128, 320, 512],
|
||||
out_channels=256,
|
||||
num_outs=4),
|
||||
decode_head=dict(
|
||||
type='FPNHead',
|
||||
in_channels=[256, 256, 256, 256],
|
||||
in_index=[0, 1, 2, 3],
|
||||
feature_strides=[4, 8, 16, 32],
|
||||
channels=128,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=150,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,61 @@
|
||||
checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa
|
||||
|
||||
# model settings
|
||||
backbone_norm_cfg = dict(type='LN')
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='PCPVT',
|
||||
init_cfg=dict(type='Pretrained', checkpoint=checkpoint),
|
||||
in_channels=3,
|
||||
embed_dims=[64, 128, 320, 512],
|
||||
num_heads=[1, 2, 5, 8],
|
||||
patch_sizes=[4, 2, 2, 2],
|
||||
strides=[4, 2, 2, 2],
|
||||
mlp_ratios=[8, 8, 4, 4],
|
||||
out_indices=(0, 1, 2, 3),
|
||||
qkv_bias=True,
|
||||
norm_cfg=backbone_norm_cfg,
|
||||
depths=[3, 4, 6, 3],
|
||||
sr_ratios=[8, 4, 2, 1],
|
||||
norm_after_stage=False,
|
||||
drop_rate=0.0,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.2),
|
||||
decode_head=dict(
|
||||
type='UPerHead',
|
||||
in_channels=[64, 128, 320, 512],
|
||||
in_index=[0, 1, 2, 3],
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=150,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=320,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=150,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
58
Seg_All_In_One_MMSeg/configs/_base_/models/upernet_beit.py
Normal file
58
Seg_All_In_One_MMSeg/configs/_base_/models/upernet_beit.py
Normal file
@@ -0,0 +1,58 @@
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='BEiT',
|
||||
img_size=(640, 640),
|
||||
patch_size=16,
|
||||
in_channels=3,
|
||||
embed_dims=768,
|
||||
num_layers=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
out_indices=(3, 5, 7, 11),
|
||||
qv_bias=True,
|
||||
attn_drop_rate=0.0,
|
||||
drop_path_rate=0.1,
|
||||
norm_cfg=dict(type='LN', eps=1e-6),
|
||||
act_cfg=dict(type='GELU'),
|
||||
norm_eval=False,
|
||||
init_values=0.1),
|
||||
neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
|
||||
decode_head=dict(
|
||||
type='UPerHead',
|
||||
in_channels=[768, 768, 768, 768],
|
||||
in_index=[0, 1, 2, 3],
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
channels=768,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=150,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=768,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=150,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,52 @@
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
custom_imports = dict(imports='mmpretrain.models', allow_failed_imports=False)
|
||||
checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth' # noqa
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='mmpretrain.ConvNeXt',
|
||||
arch='base',
|
||||
out_indices=[0, 1, 2, 3],
|
||||
drop_path_rate=0.4,
|
||||
layer_scale_init_value=1.0,
|
||||
gap_before_final_norm=False,
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint=checkpoint_file,
|
||||
prefix='backbone.')),
|
||||
decode_head=dict(
|
||||
type='UPerHead',
|
||||
in_channels=[128, 256, 512, 1024],
|
||||
in_index=[0, 1, 2, 3],
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=384,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
57
Seg_All_In_One_MMSeg/configs/_base_/models/upernet_mae.py
Normal file
57
Seg_All_In_One_MMSeg/configs/_base_/models/upernet_mae.py
Normal file
@@ -0,0 +1,57 @@
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='MAE',
|
||||
img_size=(640, 640),
|
||||
patch_size=16,
|
||||
in_channels=3,
|
||||
embed_dims=768,
|
||||
num_layers=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
out_indices=(3, 5, 7, 11),
|
||||
attn_drop_rate=0.0,
|
||||
drop_path_rate=0.1,
|
||||
norm_cfg=dict(type='LN', eps=1e-6),
|
||||
act_cfg=dict(type='GELU'),
|
||||
norm_eval=False,
|
||||
init_values=0.1),
|
||||
neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]),
|
||||
decode_head=dict(
|
||||
type='UPerHead',
|
||||
in_channels=[384, 384, 384, 384],
|
||||
in_index=[0, 1, 2, 3],
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=384,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
52
Seg_All_In_One_MMSeg/configs/_base_/models/upernet_r50.py
Normal file
52
Seg_All_In_One_MMSeg/configs/_base_/models/upernet_r50.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='open-mmlab://resnet50_v1c',
|
||||
backbone=dict(
|
||||
type='ResNetV1c',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
dilations=(1, 1, 1, 1),
|
||||
strides=(1, 2, 2, 2),
|
||||
norm_cfg=norm_cfg,
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
contract_dilation=True),
|
||||
decode_head=dict(
|
||||
type='UPerHead',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
in_index=[0, 1, 2, 3],
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=1024,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
62
Seg_All_In_One_MMSeg/configs/_base_/models/upernet_swin.py
Normal file
62
Seg_All_In_One_MMSeg/configs/_base_/models/upernet_swin.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
backbone_norm_cfg = dict(type='LN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained=None,
|
||||
backbone=dict(
|
||||
type='SwinTransformer',
|
||||
pretrain_img_size=224,
|
||||
embed_dims=96,
|
||||
patch_size=4,
|
||||
window_size=7,
|
||||
mlp_ratio=4,
|
||||
depths=[2, 2, 6, 2],
|
||||
num_heads=[3, 6, 12, 24],
|
||||
strides=(4, 2, 2, 2),
|
||||
out_indices=(0, 1, 2, 3),
|
||||
qkv_bias=True,
|
||||
qk_scale=None,
|
||||
patch_norm=True,
|
||||
drop_rate=0.,
|
||||
attn_drop_rate=0.,
|
||||
drop_path_rate=0.3,
|
||||
use_abs_pos_embed=False,
|
||||
act_cfg=dict(type='GELU'),
|
||||
norm_cfg=backbone_norm_cfg),
|
||||
decode_head=dict(
|
||||
type='UPerHead',
|
||||
in_channels=[96, 192, 384, 768],
|
||||
in_index=[0, 1, 2, 3],
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=384,
|
||||
in_index=2,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole'))
|
||||
@@ -0,0 +1,65 @@
|
||||
# model settings
|
||||
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=255)
|
||||
model = dict(
|
||||
type='EncoderDecoder',
|
||||
data_preprocessor=data_preprocessor,
|
||||
pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth',
|
||||
backbone=dict(
|
||||
type='VisionTransformer',
|
||||
img_size=(512, 512),
|
||||
patch_size=16,
|
||||
in_channels=3,
|
||||
embed_dims=768,
|
||||
num_layers=12,
|
||||
num_heads=12,
|
||||
mlp_ratio=4,
|
||||
out_indices=(2, 5, 8, 11),
|
||||
qkv_bias=True,
|
||||
drop_rate=0.0,
|
||||
attn_drop_rate=0.0,
|
||||
drop_path_rate=0.0,
|
||||
with_cls_token=True,
|
||||
norm_cfg=dict(type='LN', eps=1e-6),
|
||||
act_cfg=dict(type='GELU'),
|
||||
norm_eval=False,
|
||||
interpolate_mode='bicubic'),
|
||||
neck=dict(
|
||||
type='MultiLevelNeck',
|
||||
in_channels=[768, 768, 768, 768],
|
||||
out_channels=768,
|
||||
scales=[4, 2, 1, 0.5]),
|
||||
decode_head=dict(
|
||||
type='UPerHead',
|
||||
in_channels=[768, 768, 768, 768],
|
||||
in_index=[0, 1, 2, 3],
|
||||
pool_scales=(1, 2, 3, 6),
|
||||
channels=512,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
||||
auxiliary_head=dict(
|
||||
type='FCNHead',
|
||||
in_channels=768,
|
||||
in_index=3,
|
||||
channels=256,
|
||||
num_convs=1,
|
||||
concat_input=False,
|
||||
dropout_ratio=0.1,
|
||||
num_classes=19,
|
||||
norm_cfg=norm_cfg,
|
||||
align_corners=False,
|
||||
loss_decode=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(),
|
||||
test_cfg=dict(mode='whole')) # yapf: disable
|
||||
86
Seg_All_In_One_MMSeg/configs/_base_/models/vpd_sd.py
Normal file
86
Seg_All_In_One_MMSeg/configs/_base_/models/vpd_sd.py
Normal file
@@ -0,0 +1,86 @@
|
||||
# model settings
|
||||
data_preprocessor = dict(
|
||||
type='SegDataPreProcessor',
|
||||
mean=[127.5, 127.5, 127.5],
|
||||
std=[127.5, 127.5, 127.5],
|
||||
bgr_to_rgb=True,
|
||||
pad_val=0,
|
||||
seg_pad_val=0)
|
||||
|
||||
# adapted from stable-diffusion/configs/stable-diffusion/v1-inference.yaml
|
||||
stable_diffusion_cfg = dict(
|
||||
base_learning_rate=0.0001,
|
||||
target='ldm.models.diffusion.ddpm.LatentDiffusion',
|
||||
checkpoint='https://download.openmmlab.com/mmsegmentation/v0.5/'
|
||||
'vpd/stable_diffusion_v1-5_pretrain_third_party.pth',
|
||||
params=dict(
|
||||
linear_start=0.00085,
|
||||
linear_end=0.012,
|
||||
num_timesteps_cond=1,
|
||||
log_every_t=200,
|
||||
timesteps=1000,
|
||||
first_stage_key='jpg',
|
||||
cond_stage_key='txt',
|
||||
image_size=64,
|
||||
channels=4,
|
||||
cond_stage_trainable=False,
|
||||
conditioning_key='crossattn',
|
||||
monitor='val/loss_simple_ema',
|
||||
scale_factor=0.18215,
|
||||
use_ema=False,
|
||||
scheduler_config=dict(
|
||||
target='ldm.lr_scheduler.LambdaLinearScheduler',
|
||||
params=dict(
|
||||
warm_up_steps=[10000],
|
||||
cycle_lengths=[10000000000000],
|
||||
f_start=[1e-06],
|
||||
f_max=[1.0],
|
||||
f_min=[1.0])),
|
||||
unet_config=dict(
|
||||
target='ldm.modules.diffusionmodules.openaimodel.UNetModel',
|
||||
params=dict(
|
||||
image_size=32,
|
||||
in_channels=4,
|
||||
out_channels=4,
|
||||
model_channels=320,
|
||||
attention_resolutions=[4, 2, 1],
|
||||
num_res_blocks=2,
|
||||
channel_mult=[1, 2, 4, 4],
|
||||
num_heads=8,
|
||||
use_spatial_transformer=True,
|
||||
transformer_depth=1,
|
||||
context_dim=768,
|
||||
use_checkpoint=True,
|
||||
legacy=False)),
|
||||
first_stage_config=dict(
|
||||
target='ldm.models.autoencoder.AutoencoderKL',
|
||||
params=dict(
|
||||
embed_dim=4,
|
||||
monitor='val/rec_loss',
|
||||
ddconfig=dict(
|
||||
double_z=True,
|
||||
z_channels=4,
|
||||
resolution=256,
|
||||
in_channels=3,
|
||||
out_ch=3,
|
||||
ch=128,
|
||||
ch_mult=[1, 2, 4, 4],
|
||||
num_res_blocks=2,
|
||||
attn_resolutions=[],
|
||||
dropout=0.0),
|
||||
lossconfig=dict(target='torch.nn.Identity'))),
|
||||
cond_stage_config=dict(
|
||||
target='ldm.modules.encoders.modules.AbstractEncoder')))
|
||||
|
||||
model = dict(
|
||||
type='DepthEstimator',
|
||||
data_preprocessor=data_preprocessor,
|
||||
backbone=dict(
|
||||
type='VPD',
|
||||
diffusion_cfg=stable_diffusion_cfg,
|
||||
),
|
||||
)
|
||||
|
||||
# some of the parameters in stable-diffusion model will not be updated
|
||||
# during training
|
||||
find_unused_parameters = True
|
||||
@@ -0,0 +1,25 @@
|
||||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
||||
# learning policy
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='PolyLR',
|
||||
eta_min=1e-4,
|
||||
power=0.9,
|
||||
begin=0,
|
||||
end=160000,
|
||||
by_epoch=False)
|
||||
]
|
||||
# training schedule for 160k
|
||||
train_cfg = dict(
|
||||
type='IterBasedTrainLoop', max_iters=160000, val_interval=16000)
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
visualization=dict(type='SegVisualizationHook'))
|
||||
@@ -0,0 +1,25 @@
|
||||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
||||
# learning policy
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='PolyLR',
|
||||
eta_min=1e-4,
|
||||
power=0.9,
|
||||
begin=0,
|
||||
end=160000,
|
||||
by_epoch=False)
|
||||
]
|
||||
# training schedule for 160k
|
||||
train_cfg = dict(
|
||||
type='IterBasedTrainLoop', max_iters=160000, val_interval=10000)
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=10000),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
visualization=dict(type='SegVisualizationHook'))
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user