Initial media depth project backup

This commit is contained in:
Codex
2026-05-20 12:25:12 +08:00
commit 4a0aebb2bd
358 changed files with 182095 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import pathlib
from omegaconf import OmegaConf
def load_config(config_name: str):
config_filename = config_name + ".yaml"
return OmegaConf.load(pathlib.Path(__file__).parent.resolve() / config_filename)
dinov2_default_config = load_config("ssl_default_config")
def load_and_merge_config(config_name: str):
default_config = OmegaConf.create(dinov2_default_config)
loaded_config = load_config(config_name)
return OmegaConf.merge(default_config, loaded_config)

View File

@@ -0,0 +1,6 @@
student:
arch: vit_base
patch_size: 14
crops:
global_crops_size: 518 # this is to set up the position embeddings properly
local_crops_size: 98

View File

@@ -0,0 +1,7 @@
student:
arch: vit_giant2
patch_size: 14
ffn_layer: swiglufused
crops:
global_crops_size: 518 # this is to set up the position embeddings properly
local_crops_size: 98

View File

@@ -0,0 +1,6 @@
student:
arch: vit_large
patch_size: 14
crops:
global_crops_size: 518 # this is to set up the position embeddings properly
local_crops_size: 98

View File

@@ -0,0 +1,6 @@
student:
arch: vit_small
patch_size: 14
crops:
global_crops_size: 518 # this is to set up the position embeddings properly
local_crops_size: 98

View File

@@ -0,0 +1,115 @@
MODEL:
WEIGHTS: ''
compute_precision:
grad_scaler: true
teacher:
backbone:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp16
buffer_dtype: fp32
dino_head:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp16
buffer_dtype: fp32
ibot_head:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp16
buffer_dtype: fp32
student:
backbone:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp16
buffer_dtype: fp32
dino_head:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp32
buffer_dtype: fp32
ibot_head:
sharding_strategy: SHARD_GRAD_OP
mixed_precision:
param_dtype: fp16
reduce_dtype: fp32
buffer_dtype: fp32
dino:
loss_weight: 1.0
head_n_prototypes: 65536
head_bottleneck_dim: 256
head_nlayers: 3
head_hidden_dim: 2048
koleo_loss_weight: 0.1
ibot:
loss_weight: 1.0
mask_sample_probability: 0.5
mask_ratio_min_max:
- 0.1
- 0.5
separate_head: false
head_n_prototypes: 65536
head_bottleneck_dim: 256
head_nlayers: 3
head_hidden_dim: 2048
train:
batch_size_per_gpu: 64
dataset_path: ImageNet:split=TRAIN
output_dir: .
saveckp_freq: 20
seed: 0
num_workers: 10
OFFICIAL_EPOCH_LENGTH: 1250
cache_dataset: true
centering: "centering" # or "sinkhorn_knopp"
student:
arch: vit_large
patch_size: 16
drop_path_rate: 0.3
layerscale: 1.0e-05
drop_path_uniform: true
pretrained_weights: ''
ffn_layer: "mlp"
block_chunks: 0
qkv_bias: true
proj_bias: true
ffn_bias: true
teacher:
momentum_teacher: 0.992
final_momentum_teacher: 1
warmup_teacher_temp: 0.04
teacher_temp: 0.07
warmup_teacher_temp_epochs: 30
optim:
epochs: 100
weight_decay: 0.04
weight_decay_end: 0.4
base_lr: 0.004 # learning rate for a batch size of 1024
lr: 0. # will be set after applying scaling rule
warmup_epochs: 10
min_lr: 1.0e-06
clip_grad: 3.0
freeze_last_layer_epochs: 1
scaling_rule: sqrt_wrt_1024
patch_embed_lr_mult: 0.2
layerwise_decay: 0.9
adamw_beta1: 0.9
adamw_beta2: 0.999
crops:
global_crops_scale:
- 0.32
- 1.0
local_crops_number: 8
local_crops_scale:
- 0.05
- 0.32
global_crops_size: 224
local_crops_size: 96
evaluation:
eval_period_iterations: 12500

View File

@@ -0,0 +1,26 @@
dino:
head_n_prototypes: 131072
head_bottleneck_dim: 384
ibot:
separate_head: true
head_n_prototypes: 131072
train:
batch_size_per_gpu: 12
dataset_path: ImageNet22k
centering: sinkhorn_knopp
student:
arch: vit_giant2
patch_size: 14
drop_path_rate: 0.4
ffn_layer: swiglufused
block_chunks: 4
teacher:
momentum_teacher: 0.994
optim:
epochs: 500
weight_decay_end: 0.2
base_lr: 2.0e-04 # learning rate for a batch size of 1024
warmup_epochs: 80
layerwise_decay: 1.0
crops:
local_crops_size: 98

View File

@@ -0,0 +1,26 @@
dino:
head_n_prototypes: 131072
head_bottleneck_dim: 384
ibot:
separate_head: true
head_n_prototypes: 131072
train:
batch_size_per_gpu: 32
dataset_path: ImageNet22k
centering: sinkhorn_knopp
student:
arch: vit_large
patch_size: 14
drop_path_rate: 0.4
ffn_layer: swiglufused
block_chunks: 4
teacher:
momentum_teacher: 0.994
optim:
epochs: 500
weight_decay_end: 0.2
base_lr: 2.0e-04 # learning rate for a batch size of 1024
warmup_epochs: 80
layerwise_decay: 1.0
crops:
local_crops_size: 98

View File

@@ -0,0 +1,6 @@
# this corresponds to the default config
train:
dataset_path: ImageNet:split=TRAIN
batch_size_per_gpu: 64
student:
block_chunks: 4