| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279 |
- # @package _global_
- defaults:
- - _self_
- # This config is the base configuration for all evaluations. Amongst other things, it defines:
- # - the model
- # - the image transforms
- # - the post processors
- # - cluster configuration (only relevant for slurm-based evals, ignored otherwise)
- #
- # Most of the parameters should be kept as-is. The main modifications you may want to make are:
- # - the cluster configuration, to adjust partitions/qos to your system
- # - the flag gather_pred_via_filesys if you ram is tight
- # - num_val_workers if your number of cores is small (should be roughly number of cores / number of gpus)
- # - the paths below
- # ============================================================================
- # Paths Configuration (Chage this to your own paths)
- # ============================================================================
- paths:
- # If you leave the checkpoint path to null, the model will be downloaded from hugging-face. Otherwise provide a path
- checkpoint_path: null
- # the experiments will be subfolders of this
- base_experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
- # base path to the annotation folder for gold (refer to the readmes on how to download)
- base_annotation_path: <YOUR_GOLD_GT_DIR>
- # base path to the annotation folder for silver (refer to the readmes on how to download)
- base_annotation_path_silver: <YOUR_SILVER_GT_DIR>
- # path to the metaclip images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
- metaclip_img_path: <YOUR_METACLIP_IMG_DIR>
- # path to the sa1b images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
- sa1b_img_path: <YOUR_SA1B_IMG_DIR>
- # path to the SA-Co/silver images
- silver_img_path: <YOUR_SILVER_IMG_DIR>
- bpe_path: <BPE_PATH> # This should be under sam3/assets/bpe_simple_vocab_16e6.txt.gz
- # ============================================================================
- # Different helper parameters and functions
- # ============================================================================
- scratch:
- use_presence_eval: True
- base_val_transform:
- - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
- transforms:
- ######## transforms for validation (begin) ########
- - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
- sizes: ${scratch.resolution} # originally `resolution: 1024`
- max_size:
- _target_: sam3.train.transforms.basic.get_random_resize_max_size
- size: ${scratch.resolution} # originally `resolution: 1024`
- square: true
- consistent_transform: False
- ######## transforms for validation (end) ########
- - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
- - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
- mean: ${scratch.val_norm_mean}
- std: ${scratch.val_norm_std}
- loss: null
- # Model parameters
- d_model: 256
- input_box_embedding_dim: ${add:${scratch.d_model},2}
- # Box processing
- original_box_postprocessor:
- _target_: sam3.eval.postprocessors.PostProcessImage
- max_dets_per_img: -1 # infinite detections
- use_original_ids: true
- use_original_sizes_box: true
- use_presence: ${scratch.use_presence_eval}
- box_postprocessor:
- _target_: sam3.eval.postprocessors.PostProcessImage
- max_dets_per_img: -1 #infinite detections
- use_original_ids: false
- use_original_sizes_box: false
- use_presence: ${scratch.use_presence_eval}
- box_postprocessor_thresholded:
- _target_: sam3.eval.postprocessors.PostProcessImage
- max_dets_per_img: -1 #infinite detections
- use_original_ids: false
- use_original_sizes_box: false
- detection_threshold: 0.3
- use_presence: ${scratch.use_presence_eval}
- mask_postprocessor_thresholded:
- _target_: sam3.eval.postprocessors.PostProcessImage
- max_dets_per_img: -1 #infinite detections
- iou_type: "segm"
- use_original_ids: false
- use_original_sizes_box: false
- use_original_sizes_mask: true
- convert_mask_to_rle: True
- detection_threshold: 0.3
- use_presence: ${scratch.use_presence_eval}
- # Image processing parameters
- resolution: 1008
- max_ann_per_img: 200
- # Normalization parameters
- train_norm_mean: [0.5, 0.5, 0.5]
- train_norm_std: [0.5, 0.5, 0.5]
- val_norm_mean: [0.5, 0.5, 0.5]
- val_norm_std: [0.5, 0.5, 0.5]
- # Training parameters
- train_batch_size: 1
- val_batch_size: 1
- num_train_workers: 0
- num_val_workers: 10 # change this depending on the number of cpu cores available
- max_data_epochs: 20
- target_epoch_size: 1500
- hybrid_repeats: 1
- context_length: 2
- # All reduce - this controls how the predictions are sent back to node 0.
- # If you have a lot of ram, CPU gather is faster. Otherwise, we provide a fallback through filesystem (eg NFS)
- # Switch to true if you get cpu ooms during gather.
- gather_pred_via_filesys: false
- # Learning rate and scheduler parameters (unused for eval)
- lr_scale: 0.1
- lr_transformer: ${times:8e-4,${scratch.lr_scale}}
- lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
- lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
- lrd_vision_backbone: 0.9 # (lower for in-domain adn higher for ood)
- wd: 0.1
- scheduler_timescale: 20
- scheduler_warmup: 20
- scheduler_cooldown: 20
- # ============================================================================
- # Trainer Configuration
- # ============================================================================
- trainer:
- _target_: sam3.train.trainer.Trainer
- skip_saving_ckpts: true
- empty_gpu_mem_cache_after_eval: True
- skip_first_val: True
- max_epochs: ${scratch.max_data_epochs}
- accelerator: cuda
- seed_value: 123
- val_epoch_freq: 10
- mode: val
- distributed:
- backend: nccl
- find_unused_parameters: True
- gradient_as_bucket_view: True
- loss:
- all:
- _target_: sam3.train.loss.sam3_loss.DummyLoss
- default:
- _target_: sam3.train.loss.sam3_loss.DummyLoss
- data:
- train: null
- val: null
- model:
- _target_: sam3.model_builder.build_sam3_image_model
- bpe_path: ${paths.bpe_path}
- device: cpus
- eval_mode: true
- enable_segmentation: true # Warning: Enable this if using segmentation.
- checkpoint_path: ${paths.checkpoint_path}
- meters:
- val: null
- optim:
- amp:
- enabled: True
- amp_dtype: bfloat16
- optimizer:
- _target_: torch.optim.AdamW
- gradient_clip:
- _target_: sam3.train.optim.optimizer.GradientClipper
- max_norm: 0.1
- norm_type: 2
- param_group_modifiers:
- - _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
- _partial_: True
- layer_decay_value: ${scratch.lrd_vision_backbone}
- apply_to: 'backbone.vision_backbone.trunk'
- overrides:
- - pattern: '*pos_embed*'
- value: 1.0
- options:
- lr:
- - scheduler: # transformer and class_embed
- _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
- base_lr: ${scratch.lr_transformer}
- timescale: ${scratch.scheduler_timescale}
- warmup_steps: ${scratch.scheduler_warmup}
- cooldown_steps: ${scratch.scheduler_cooldown}
- - scheduler:
- _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
- base_lr: ${scratch.lr_vision_backbone}
- timescale: ${scratch.scheduler_timescale}
- warmup_steps: ${scratch.scheduler_warmup}
- cooldown_steps: ${scratch.scheduler_cooldown}
- param_names:
- - 'backbone.vision_backbone.*'
- - scheduler:
- _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
- base_lr: ${scratch.lr_language_backbone}
- timescale: ${scratch.scheduler_timescale}
- warmup_steps: ${scratch.scheduler_warmup}
- cooldown_steps: ${scratch.scheduler_cooldown}
- param_names:
- - 'backbone.language_backbone.*'
- weight_decay:
- - scheduler:
- _target_: fvcore.common.param_scheduler.ConstantParamScheduler
- value: ${scratch.wd}
- - scheduler:
- _target_: fvcore.common.param_scheduler.ConstantParamScheduler
- value: 0.0
- param_names:
- - '*bias*'
- module_cls_names: ['torch.nn.LayerNorm']
- checkpoint:
- save_dir: ${launcher.experiment_log_dir}/checkpoints
- save_freq: 0 # 0 only last checkpoint is saved.
- logging:
- tensorboard_writer:
- _target_: sam3.train.utils.logger.make_tensorboard_logger
- log_dir: ${launcher.experiment_log_dir}/tensorboard
- flush_secs: 120
- should_log: True
- wandb_writer: null
- log_dir: ${launcher.experiment_log_dir}/logs/
- log_freq: 10
- # ============================================================================
- # Launcher and Submitit Configuration
- # ============================================================================
- launcher:
- num_nodes: 4
- gpus_per_node: 8
- experiment_log_dir: ${paths.experiment_log_dir}
- multiprocessing_context: forkserver
- submitit:
- account: null # Add your SLURM account if use_cluster == 1
- partition: null
- qos: null # Add your QoS if use_cluster == 1
- timeout_hour: 72
- use_cluster: True
- cpus_per_task: 10
- port_range: [10000, 65000]
- constraint: null
|