Initial media depth project backup
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import pathlib
|
||||
|
||||
from omegaconf import OmegaConf
|
||||
|
||||
|
||||
def load_config(config_name: str):
|
||||
config_filename = config_name + ".yaml"
|
||||
return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
|
||||
|
||||
|
||||
dinov2_default_config = load_config("ssl_default_config")
|
||||
|
||||
|
||||
def load_and_merge_config(config_name: str):
|
||||
default_config = OmegaConf.create(dinov2_default_config)
|
||||
loaded_config = load_config(config_name)
|
||||
return OmegaConf.merge(default_config, loaded_config)
|
||||
@@ -0,0 +1,6 @@
|
||||
student:
|
||||
arch: vit_base
|
||||
patch_size: 14
|
||||
crops:
|
||||
global_crops_size: 518 # this is to set up the position embeddings properly
|
||||
local_crops_size: 98
|
||||
@@ -0,0 +1,7 @@
|
||||
student:
|
||||
arch: vit_giant2
|
||||
patch_size: 14
|
||||
ffn_layer: swiglufused
|
||||
crops:
|
||||
global_crops_size: 518 # this is to set up the position embeddings properly
|
||||
local_crops_size: 98
|
||||
@@ -0,0 +1,6 @@
|
||||
student:
|
||||
arch: vit_large
|
||||
patch_size: 14
|
||||
crops:
|
||||
global_crops_size: 518 # this is to set up the position embeddings properly
|
||||
local_crops_size: 98
|
||||
@@ -0,0 +1,6 @@
|
||||
student:
|
||||
arch: vit_small
|
||||
patch_size: 14
|
||||
crops:
|
||||
global_crops_size: 518 # this is to set up the position embeddings properly
|
||||
local_crops_size: 98
|
||||
@@ -0,0 +1,115 @@
|
||||
MODEL:
|
||||
WEIGHTS: ''
|
||||
compute_precision:
|
||||
grad_scaler: true
|
||||
teacher:
|
||||
backbone:
|
||||
sharding_strategy: SHARD_GRAD_OP
|
||||
mixed_precision:
|
||||
param_dtype: fp16
|
||||
reduce_dtype: fp16
|
||||
buffer_dtype: fp32
|
||||
dino_head:
|
||||
sharding_strategy: SHARD_GRAD_OP
|
||||
mixed_precision:
|
||||
param_dtype: fp16
|
||||
reduce_dtype: fp16
|
||||
buffer_dtype: fp32
|
||||
ibot_head:
|
||||
sharding_strategy: SHARD_GRAD_OP
|
||||
mixed_precision:
|
||||
param_dtype: fp16
|
||||
reduce_dtype: fp16
|
||||
buffer_dtype: fp32
|
||||
student:
|
||||
backbone:
|
||||
sharding_strategy: SHARD_GRAD_OP
|
||||
mixed_precision:
|
||||
param_dtype: fp16
|
||||
reduce_dtype: fp16
|
||||
buffer_dtype: fp32
|
||||
dino_head:
|
||||
sharding_strategy: SHARD_GRAD_OP
|
||||
mixed_precision:
|
||||
param_dtype: fp16
|
||||
reduce_dtype: fp32
|
||||
buffer_dtype: fp32
|
||||
ibot_head:
|
||||
sharding_strategy: SHARD_GRAD_OP
|
||||
mixed_precision:
|
||||
param_dtype: fp16
|
||||
reduce_dtype: fp32
|
||||
buffer_dtype: fp32
|
||||
dino:
|
||||
loss_weight: 1.0
|
||||
head_n_prototypes: 65536
|
||||
head_bottleneck_dim: 256
|
||||
head_nlayers: 3
|
||||
head_hidden_dim: 2048
|
||||
koleo_loss_weight: 0.1
|
||||
ibot:
|
||||
loss_weight: 1.0
|
||||
mask_sample_probability: 0.5
|
||||
mask_ratio_min_max:
|
||||
- 0.1
|
||||
- 0.5
|
||||
separate_head: false
|
||||
head_n_prototypes: 65536
|
||||
head_bottleneck_dim: 256
|
||||
head_nlayers: 3
|
||||
head_hidden_dim: 2048
|
||||
train:
|
||||
batch_size_per_gpu: 64
|
||||
dataset_path: ImageNet:split=TRAIN
|
||||
output_dir: .
|
||||
saveckp_freq: 20
|
||||
seed: 0
|
||||
num_workers: 10
|
||||
OFFICIAL_EPOCH_LENGTH: 1250
|
||||
cache_dataset: true
|
||||
centering: "centering" # or "sinkhorn_knopp"
|
||||
student:
|
||||
arch: vit_large
|
||||
patch_size: 16
|
||||
drop_path_rate: 0.3
|
||||
layerscale: 1.0e-05
|
||||
drop_path_uniform: true
|
||||
pretrained_weights: ''
|
||||
ffn_layer: "mlp"
|
||||
block_chunks: 0
|
||||
qkv_bias: true
|
||||
proj_bias: true
|
||||
ffn_bias: true
|
||||
teacher:
|
||||
momentum_teacher: 0.992
|
||||
final_momentum_teacher: 1
|
||||
warmup_teacher_temp: 0.04
|
||||
teacher_temp: 0.07
|
||||
warmup_teacher_temp_epochs: 30
|
||||
optim:
|
||||
epochs: 100
|
||||
weight_decay: 0.04
|
||||
weight_decay_end: 0.4
|
||||
base_lr: 0.004 # learning rate for a batch size of 1024
|
||||
lr: 0. # will be set after applying scaling rule
|
||||
warmup_epochs: 10
|
||||
min_lr: 1.0e-06
|
||||
clip_grad: 3.0
|
||||
freeze_last_layer_epochs: 1
|
||||
scaling_rule: sqrt_wrt_1024
|
||||
patch_embed_lr_mult: 0.2
|
||||
layerwise_decay: 0.9
|
||||
adamw_beta1: 0.9
|
||||
adamw_beta2: 0.999
|
||||
crops:
|
||||
global_crops_scale:
|
||||
- 0.32
|
||||
- 1.0
|
||||
local_crops_number: 8
|
||||
local_crops_scale:
|
||||
- 0.05
|
||||
- 0.32
|
||||
global_crops_size: 224
|
||||
local_crops_size: 96
|
||||
evaluation:
|
||||
eval_period_iterations: 12500
|
||||
@@ -0,0 +1,26 @@
|
||||
dino:
|
||||
head_n_prototypes: 131072
|
||||
head_bottleneck_dim: 384
|
||||
ibot:
|
||||
separate_head: true
|
||||
head_n_prototypes: 131072
|
||||
train:
|
||||
batch_size_per_gpu: 12
|
||||
dataset_path: ImageNet22k
|
||||
centering: sinkhorn_knopp
|
||||
student:
|
||||
arch: vit_giant2
|
||||
patch_size: 14
|
||||
drop_path_rate: 0.4
|
||||
ffn_layer: swiglufused
|
||||
block_chunks: 4
|
||||
teacher:
|
||||
momentum_teacher: 0.994
|
||||
optim:
|
||||
epochs: 500
|
||||
weight_decay_end: 0.2
|
||||
base_lr: 2.0e-04 # learning rate for a batch size of 1024
|
||||
warmup_epochs: 80
|
||||
layerwise_decay: 1.0
|
||||
crops:
|
||||
local_crops_size: 98
|
||||
@@ -0,0 +1,26 @@
|
||||
dino:
|
||||
head_n_prototypes: 131072
|
||||
head_bottleneck_dim: 384
|
||||
ibot:
|
||||
separate_head: true
|
||||
head_n_prototypes: 131072
|
||||
train:
|
||||
batch_size_per_gpu: 32
|
||||
dataset_path: ImageNet22k
|
||||
centering: sinkhorn_knopp
|
||||
student:
|
||||
arch: vit_large
|
||||
patch_size: 14
|
||||
drop_path_rate: 0.4
|
||||
ffn_layer: swiglufused
|
||||
block_chunks: 4
|
||||
teacher:
|
||||
momentum_teacher: 0.994
|
||||
optim:
|
||||
epochs: 500
|
||||
weight_decay_end: 0.2
|
||||
base_lr: 2.0e-04 # learning rate for a batch size of 1024
|
||||
warmup_epochs: 80
|
||||
layerwise_decay: 1.0
|
||||
crops:
|
||||
local_crops_size: 98
|
||||
@@ -0,0 +1,6 @@
|
||||
# this corresponds to the default config
|
||||
train:
|
||||
dataset_path: ImageNet:split=TRAIN
|
||||
batch_size_per_gpu: 64
|
||||
student:
|
||||
block_chunks: 4
|
||||
Reference in New Issue
Block a user