Initial media depth project backup
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
@@ -0,0 +1,60 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dinov2.eval.knn import get_args_parser as get_knn_args_parser
|
||||
from dinov2.logging import setup_logging
|
||||
from dinov2.run.submit import get_args_parser, submit_jobs
|
||||
|
||||
|
||||
logger = logging.getLogger("dinov2")
|
||||
|
||||
|
||||
class Evaluator:
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
|
||||
def __call__(self):
|
||||
from dinov2.eval.knn import main as knn_main
|
||||
|
||||
self._setup_args()
|
||||
knn_main(self.args)
|
||||
|
||||
def checkpoint(self):
|
||||
import submitit
|
||||
|
||||
logger.info(f"Requeuing {self.args}")
|
||||
empty = type(self)(self.args)
|
||||
return submitit.helpers.DelayedSubmission(empty)
|
||||
|
||||
def _setup_args(self):
|
||||
import submitit
|
||||
|
||||
job_env = submitit.JobEnvironment()
|
||||
self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id))
|
||||
logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}")
|
||||
logger.info(f"Args: {self.args}")
|
||||
|
||||
|
||||
def main():
|
||||
description = "Submitit launcher for DINOv2 k-NN evaluation"
|
||||
knn_args_parser = get_knn_args_parser(add_help=False)
|
||||
parents = [knn_args_parser]
|
||||
args_parser = get_args_parser(description=description, parents=parents)
|
||||
args = args_parser.parse_args()
|
||||
|
||||
setup_logging()
|
||||
|
||||
assert os.path.exists(args.config_file), "Configuration file does not exist!"
|
||||
submit_jobs(Evaluator, args, name="dinov2:knn")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,60 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dinov2.eval.linear import get_args_parser as get_linear_args_parser
|
||||
from dinov2.logging import setup_logging
|
||||
from dinov2.run.submit import get_args_parser, submit_jobs
|
||||
|
||||
|
||||
logger = logging.getLogger("dinov2")
|
||||
|
||||
|
||||
class Evaluator:
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
|
||||
def __call__(self):
|
||||
from dinov2.eval.linear import main as linear_main
|
||||
|
||||
self._setup_args()
|
||||
linear_main(self.args)
|
||||
|
||||
def checkpoint(self):
|
||||
import submitit
|
||||
|
||||
logger.info(f"Requeuing {self.args}")
|
||||
empty = type(self)(self.args)
|
||||
return submitit.helpers.DelayedSubmission(empty)
|
||||
|
||||
def _setup_args(self):
|
||||
import submitit
|
||||
|
||||
job_env = submitit.JobEnvironment()
|
||||
self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id))
|
||||
logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}")
|
||||
logger.info(f"Args: {self.args}")
|
||||
|
||||
|
||||
def main():
|
||||
description = "Submitit launcher for DINOv2 linear evaluation"
|
||||
linear_args_parser = get_linear_args_parser(add_help=False)
|
||||
parents = [linear_args_parser]
|
||||
args_parser = get_args_parser(description=description, parents=parents)
|
||||
args = args_parser.parse_args()
|
||||
|
||||
setup_logging()
|
||||
|
||||
assert os.path.exists(args.config_file), "Configuration file does not exist!"
|
||||
submit_jobs(Evaluator, args, name="dinov2:linear")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,60 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dinov2.eval.log_regression import get_args_parser as get_log_regression_args_parser
|
||||
from dinov2.logging import setup_logging
|
||||
from dinov2.run.submit import get_args_parser, submit_jobs
|
||||
|
||||
|
||||
logger = logging.getLogger("dinov2")
|
||||
|
||||
|
||||
class Evaluator:
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
|
||||
def __call__(self):
|
||||
from dinov2.eval.log_regression import main as log_regression_main
|
||||
|
||||
self._setup_args()
|
||||
log_regression_main(self.args)
|
||||
|
||||
def checkpoint(self):
|
||||
import submitit
|
||||
|
||||
logger.info(f"Requeuing {self.args}")
|
||||
empty = type(self)(self.args)
|
||||
return submitit.helpers.DelayedSubmission(empty)
|
||||
|
||||
def _setup_args(self):
|
||||
import submitit
|
||||
|
||||
job_env = submitit.JobEnvironment()
|
||||
self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id))
|
||||
logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}")
|
||||
logger.info(f"Args: {self.args}")
|
||||
|
||||
|
||||
def main():
|
||||
description = "Submitit launcher for DINOv2 logistic evaluation"
|
||||
log_regression_args_parser = get_log_regression_args_parser(add_help=False)
|
||||
parents = [log_regression_args_parser]
|
||||
args_parser = get_args_parser(description=description, parents=parents)
|
||||
args = args_parser.parse_args()
|
||||
|
||||
setup_logging()
|
||||
|
||||
assert os.path.exists(args.config_file), "Configuration file does not exist!"
|
||||
submit_jobs(Evaluator, args, name="dinov2:logreg")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -0,0 +1,123 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
import submitit
|
||||
|
||||
from dinov2.utils.cluster import (
|
||||
get_slurm_executor_parameters,
|
||||
get_slurm_partition,
|
||||
get_user_checkpoint_path,
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger("dinov2")
|
||||
|
||||
|
||||
def get_args_parser(
|
||||
description: Optional[str] = None,
|
||||
parents: Optional[List[argparse.ArgumentParser]] = None,
|
||||
add_help: bool = True,
|
||||
) -> argparse.ArgumentParser:
|
||||
parents = parents or []
|
||||
slurm_partition = get_slurm_partition()
|
||||
parser = argparse.ArgumentParser(
|
||||
description=description,
|
||||
parents=parents,
|
||||
add_help=add_help,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ngpus",
|
||||
"--gpus",
|
||||
"--gpus-per-node",
|
||||
default=8,
|
||||
type=int,
|
||||
help="Number of GPUs to request on each node",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--nodes",
|
||||
"--nnodes",
|
||||
default=2,
|
||||
type=int,
|
||||
help="Number of nodes to request",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
default=2800,
|
||||
type=int,
|
||||
help="Duration of the job",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--partition",
|
||||
default=slurm_partition,
|
||||
type=str,
|
||||
help="Partition where to submit",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--use-volta32",
|
||||
action="store_true",
|
||||
help="Request V100-32GB GPUs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--comment",
|
||||
default="",
|
||||
type=str,
|
||||
help="Comment to pass to scheduler, e.g. priority message",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--exclude",
|
||||
default="",
|
||||
type=str,
|
||||
help="Nodes to exclude",
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def get_shared_folder() -> Path:
|
||||
user_checkpoint_path = get_user_checkpoint_path()
|
||||
if user_checkpoint_path is None:
|
||||
raise RuntimeError("Path to user checkpoint cannot be determined")
|
||||
path = user_checkpoint_path / "experiments"
|
||||
path.mkdir(exist_ok=True)
|
||||
return path
|
||||
|
||||
|
||||
def submit_jobs(task_class, args, name: str):
|
||||
if not args.output_dir:
|
||||
args.output_dir = str(get_shared_folder() / "%j")
|
||||
|
||||
Path(args.output_dir).mkdir(parents=True, exist_ok=True)
|
||||
executor = submitit.AutoExecutor(folder=args.output_dir, slurm_max_num_timeout=30)
|
||||
|
||||
kwargs = {}
|
||||
if args.use_volta32:
|
||||
kwargs["slurm_constraint"] = "volta32gb"
|
||||
if args.comment:
|
||||
kwargs["slurm_comment"] = args.comment
|
||||
if args.exclude:
|
||||
kwargs["slurm_exclude"] = args.exclude
|
||||
|
||||
executor_params = get_slurm_executor_parameters(
|
||||
nodes=args.nodes,
|
||||
num_gpus_per_node=args.ngpus,
|
||||
timeout_min=args.timeout, # max is 60 * 72
|
||||
slurm_signal_delay_s=120,
|
||||
slurm_partition=args.partition,
|
||||
**kwargs,
|
||||
)
|
||||
executor.update_parameters(name=name, **executor_params)
|
||||
|
||||
task = task_class(args)
|
||||
job = executor.submit(task)
|
||||
|
||||
logger.info(f"Submitted job_id: {job.job_id}")
|
||||
str_output_dir = os.path.abspath(args.output_dir).replace("%j", str(job.job_id))
|
||||
logger.info(f"Logs and checkpoints will be saved at: {str_output_dir}")
|
||||
@@ -0,0 +1,60 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dinov2.logging import setup_logging
|
||||
from dinov2.train import get_args_parser as get_train_args_parser
|
||||
from dinov2.run.submit import get_args_parser, submit_jobs
|
||||
|
||||
|
||||
logger = logging.getLogger("dinov2")
|
||||
|
||||
|
||||
class Trainer(object):
|
||||
def __init__(self, args):
|
||||
self.args = args
|
||||
|
||||
def __call__(self):
|
||||
from dinov2.train import main as train_main
|
||||
|
||||
self._setup_args()
|
||||
train_main(self.args)
|
||||
|
||||
def checkpoint(self):
|
||||
import submitit
|
||||
|
||||
logger.info(f"Requeuing {self.args}")
|
||||
empty = type(self)(self.args)
|
||||
return submitit.helpers.DelayedSubmission(empty)
|
||||
|
||||
def _setup_args(self):
|
||||
import submitit
|
||||
|
||||
job_env = submitit.JobEnvironment()
|
||||
self.args.output_dir = self.args.output_dir.replace("%j", str(job_env.job_id))
|
||||
logger.info(f"Process group: {job_env.num_tasks} tasks, rank: {job_env.global_rank}")
|
||||
logger.info(f"Args: {self.args}")
|
||||
|
||||
|
||||
def main():
|
||||
description = "Submitit launcher for DINOv2 training"
|
||||
train_args_parser = get_train_args_parser(add_help=False)
|
||||
parents = [train_args_parser]
|
||||
args_parser = get_args_parser(description=description, parents=parents)
|
||||
args = args_parser.parse_args()
|
||||
|
||||
setup_logging()
|
||||
|
||||
assert os.path.exists(args.config_file), "Configuration file does not exist!"
|
||||
submit_jobs(Trainer, args, name="dinov2:train")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user