# @package _global_ defaults: - _self_ # This config is the base configuration for all evaluations. Amongst other things, it defines: # - the model # - the image transforms # - the post processors # - cluster configuration (only relevant for slurm-based evals, ignored otherwise) # # Most of the parameters should be kept as-is. The main modifications you may want to make are: # - the cluster configuration, to adjust partitions/qos to your system # - the flag gather_pred_via_filesys if you ram is tight # - num_val_workers if your number of cores is small (should be roughly number of cores / number of gpus) # - the paths below # ============================================================================ # Paths Configuration (Chage this to your own paths) # ============================================================================ paths: # If you leave the checkpoint path to null, the model will be downloaded from hugging-face. Otherwise provide a path checkpoint_path: null # the experiments will be subfolders of this base_experiment_log_dir: # base path to the annotation folder for gold (refer to the readmes on how to download) base_annotation_path: # base path to the annotation folder for silver (refer to the readmes on how to download) base_annotation_path_silver: # path to the metaclip images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset. metaclip_img_path: # path to the sa1b images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset. sa1b_img_path: # path to the SA-Co/silver images silver_img_path: bpe_path: # This should be under sam3/assets/bpe_simple_vocab_16e6.txt.gz # ============================================================================ # Different helper parameters and functions # ============================================================================ scratch: use_presence_eval: True base_val_transform: - _target_: sam3.train.transforms.basic_for_api.ComposeAPI transforms: ######## transforms for validation (begin) ######## - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI sizes: ${scratch.resolution} # originally `resolution: 1024` max_size: _target_: sam3.train.transforms.basic.get_random_resize_max_size size: ${scratch.resolution} # originally `resolution: 1024` square: true consistent_transform: False ######## transforms for validation (end) ######## - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI mean: ${scratch.val_norm_mean} std: ${scratch.val_norm_std} loss: null # Model parameters d_model: 256 input_box_embedding_dim: ${add:${scratch.d_model},2} # Box processing original_box_postprocessor: _target_: sam3.eval.postprocessors.PostProcessImage max_dets_per_img: -1 # infinite detections use_original_ids: true use_original_sizes_box: true use_presence: ${scratch.use_presence_eval} box_postprocessor: _target_: sam3.eval.postprocessors.PostProcessImage max_dets_per_img: -1 #infinite detections use_original_ids: false use_original_sizes_box: false use_presence: ${scratch.use_presence_eval} box_postprocessor_thresholded: _target_: sam3.eval.postprocessors.PostProcessImage max_dets_per_img: -1 #infinite detections use_original_ids: false use_original_sizes_box: false detection_threshold: 0.3 use_presence: ${scratch.use_presence_eval} mask_postprocessor_thresholded: _target_: sam3.eval.postprocessors.PostProcessImage max_dets_per_img: -1 #infinite detections iou_type: "segm" use_original_ids: false use_original_sizes_box: false use_original_sizes_mask: true convert_mask_to_rle: True detection_threshold: 0.3 use_presence: ${scratch.use_presence_eval} # Image processing parameters resolution: 1008 max_ann_per_img: 200 # Normalization parameters train_norm_mean: [0.5, 0.5, 0.5] train_norm_std: [0.5, 0.5, 0.5] val_norm_mean: [0.5, 0.5, 0.5] val_norm_std: [0.5, 0.5, 0.5] # Training parameters train_batch_size: 1 val_batch_size: 1 num_train_workers: 0 num_val_workers: 10 # change this depending on the number of cpu cores available max_data_epochs: 20 target_epoch_size: 1500 hybrid_repeats: 1 context_length: 2 # All reduce - this controls how the predictions are sent back to node 0. # If you have a lot of ram, CPU gather is faster. Otherwise, we provide a fallback through filesystem (eg NFS) # Switch to true if you get cpu ooms during gather. gather_pred_via_filesys: false # Learning rate and scheduler parameters (unused for eval) lr_scale: 0.1 lr_transformer: ${times:8e-4,${scratch.lr_scale}} lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}} lr_language_backbone: ${times:5e-5,${scratch.lr_scale}} lrd_vision_backbone: 0.9 # (lower for in-domain adn higher for ood) wd: 0.1 scheduler_timescale: 20 scheduler_warmup: 20 scheduler_cooldown: 20 # ============================================================================ # Trainer Configuration # ============================================================================ trainer: _target_: sam3.train.trainer.Trainer skip_saving_ckpts: true empty_gpu_mem_cache_after_eval: True skip_first_val: True max_epochs: ${scratch.max_data_epochs} accelerator: cuda seed_value: 123 val_epoch_freq: 10 mode: val distributed: backend: nccl find_unused_parameters: True gradient_as_bucket_view: True loss: all: _target_: sam3.train.loss.sam3_loss.DummyLoss default: _target_: sam3.train.loss.sam3_loss.DummyLoss data: train: null val: null model: _target_: sam3.model_builder.build_sam3_image_model bpe_path: ${paths.bpe_path} device: cpus eval_mode: true enable_segmentation: true # Warning: Enable this if using segmentation. checkpoint_path: ${paths.checkpoint_path} meters: val: null optim: amp: enabled: True amp_dtype: bfloat16 optimizer: _target_: torch.optim.AdamW gradient_clip: _target_: sam3.train.optim.optimizer.GradientClipper max_norm: 0.1 norm_type: 2 param_group_modifiers: - _target_: sam3.train.optim.optimizer.layer_decay_param_modifier _partial_: True layer_decay_value: ${scratch.lrd_vision_backbone} apply_to: 'backbone.vision_backbone.trunk' overrides: - pattern: '*pos_embed*' value: 1.0 options: lr: - scheduler: # transformer and class_embed _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler base_lr: ${scratch.lr_transformer} timescale: ${scratch.scheduler_timescale} warmup_steps: ${scratch.scheduler_warmup} cooldown_steps: ${scratch.scheduler_cooldown} - scheduler: _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler base_lr: ${scratch.lr_vision_backbone} timescale: ${scratch.scheduler_timescale} warmup_steps: ${scratch.scheduler_warmup} cooldown_steps: ${scratch.scheduler_cooldown} param_names: - 'backbone.vision_backbone.*' - scheduler: _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler base_lr: ${scratch.lr_language_backbone} timescale: ${scratch.scheduler_timescale} warmup_steps: ${scratch.scheduler_warmup} cooldown_steps: ${scratch.scheduler_cooldown} param_names: - 'backbone.language_backbone.*' weight_decay: - scheduler: _target_: fvcore.common.param_scheduler.ConstantParamScheduler value: ${scratch.wd} - scheduler: _target_: fvcore.common.param_scheduler.ConstantParamScheduler value: 0.0 param_names: - '*bias*' module_cls_names: ['torch.nn.LayerNorm'] checkpoint: save_dir: ${launcher.experiment_log_dir}/checkpoints save_freq: 0 # 0 only last checkpoint is saved. logging: tensorboard_writer: _target_: sam3.train.utils.logger.make_tensorboard_logger log_dir: ${launcher.experiment_log_dir}/tensorboard flush_secs: 120 should_log: True wandb_writer: null log_dir: ${launcher.experiment_log_dir}/logs/ log_freq: 10 # ============================================================================ # Launcher and Submitit Configuration # ============================================================================ launcher: num_nodes: 4 gpus_per_node: 8 experiment_log_dir: ${paths.experiment_log_dir} multiprocessing_context: forkserver submitit: account: null # Add your SLURM account if use_cluster == 1 partition: null qos: null # Add your QoS if use_cluster == 1 timeout_hour: 72 use_cluster: True cpus_per_task: 10 port_range: [10000, 65000] constraint: null