| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591 |
- # @package _global_
- defaults:
- - _self_
- # ============================================================================
- # Paths Configuration (Chage this to your own paths)
- # ============================================================================
- # python sam3/train/train.py -c configs/odinw_text_only.yaml --use-cluster 1 --partition ${PARTITION} --account ${ACCOUNT} --qos ${QoS}
- paths:
- odinw_data_root: <YOUR_DATA_DIR>
- experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
- bpe_path: <BPE_PATH> # This should be under sam3/assets/bpe_simple_vocab_16e6.txt.gz
- odinw_train:
- train_file: fewshot_train_shot10_seed300
- num_images: null
- supercategory_tuple: ${all_odinw_supercategories.${string:${submitit.job_array.task_index}}}
- # Training transforms pipeline
- train_transforms:
- - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
- transforms:
- - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
- query_filter:
- _target_: sam3.train.transforms.filter_query_transforms.FilterCrowds
- - _target_: sam3.train.transforms.point_sampling.RandomizeInputBbox
- box_noise_std: 0.1
- box_noise_max: 20
- - _target_: sam3.train.transforms.segmentation.DecodeRle
- - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
- sizes:
- _target_: sam3.train.transforms.basic.get_random_resize_scales
- size: ${scratch.resolution}
- min_size: 480
- rounded: false
- max_size:
- _target_: sam3.train.transforms.basic.get_random_resize_max_size
- size: ${scratch.resolution}
- square: true
- consistent_transform: ${scratch.consistent_transform}
- - _target_: sam3.train.transforms.basic_for_api.PadToSizeAPI
- size: ${scratch.resolution}
- consistent_transform: ${scratch.consistent_transform}
- - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
- - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
- query_filter:
- _target_: sam3.train.transforms.filter_query_transforms.FilterEmptyTargets
- - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
- mean: ${scratch.train_norm_mean}
- std: ${scratch.train_norm_std}
- - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
- query_filter:
- _target_: sam3.train.transforms.filter_query_transforms.FilterEmptyTargets
- - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
- query_filter:
- _target_: sam3.train.transforms.filter_query_transforms.FilterFindQueriesWithTooManyOut
- max_num_objects: ${scratch.max_ann_per_img}
- # Validation transforms pipeline
- val_transforms:
- - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
- transforms:
- - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
- sizes: ${scratch.resolution}
- max_size:
- _target_: sam3.train.transforms.basic.get_random_resize_max_size
- size: ${scratch.resolution}
- square: true
- consistent_transform: False
- - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
- - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
- mean: ${scratch.val_norm_mean}
- std: ${scratch.val_norm_std}
- # loss config (no mask loss)
- loss:
- _target_: sam3.train.loss.sam3_loss.Sam3LossWrapper
- matcher: ${scratch.matcher}
- o2m_weight: 2.0
- o2m_matcher:
- _target_: sam3.train.matcher.BinaryOneToManyMatcher
- alpha: 0.3
- threshold: 0.4
- topk: 4
- use_o2m_matcher_on_o2m_aux: ${scratch.use_o2m_matcher_on_o2m_aux}
- loss_fns_find:
- - _target_: sam3.train.loss.loss_fns.Boxes
- weight_dict:
- loss_bbox: 5.0
- loss_giou: 2.0
- - _target_: sam3.train.loss.loss_fns.IABCEMdetr
- weak_loss: False
- weight_dict:
- loss_ce: ${scratch.loss_ce_weight} # Change
- presence_loss: ${scratch.presence_weight} # Change
- pos_weight: ${scratch.iabce_pos_weight}
- alpha: ${scratch.iabce_alpha}
- gamma: 2
- use_presence: True # Change
- pos_focal: ${scratch.iabce_pos_focal}
- pad_n_queries: ${scratch.num_queries}
- pad_scale_pos: ${scratch.instance_query_loss_pad_scale_pos}
- loss_fn_semantic_seg: null
- scale_by_find_batch_size: ${scratch.scale_by_find_batch_size}
- # ============================================================================
- # Different helper parameters and functions
- # ============================================================================
- scratch:
- enable_segmentation: False
- use_act_checkpoint_geo_encoder: True
- input_geometry_encoder:
- _target_: sam3.model.geometry_encoders.SequenceGeometryEncoder
- pos_enc: ${scratch.pos_embed}
- encode_boxes_as_points: False
- points_direct_project: True
- points_pool: True
- points_pos_enc: True
- boxes_direct_project: True
- boxes_pool: True
- boxes_pos_enc: True
- d_model: ${scratch.d_model}
- num_layers: 3
- use_act_ckpt: ${scratch.use_act_checkpoint_geo_encoder}
- layer:
- _target_: sam3.model.encoder.TransformerEncoderLayer
- activation: "relu"
- d_model: ${scratch.d_model}
- dim_feedforward: 2048
- dropout: ${scratch.encoder_dropout}
- pos_enc_at_attn: false
- pre_norm: True
- pos_enc_at_cross_attn_queries: false
- pos_enc_at_cross_attn_keys: true
- self_attention:
- _target_: sam3.model.attention.MultiheadAttention
- attn_type: Vanilla
- num_heads: 8
- dropout: ${scratch.encoder_dropout}
- embed_dim: ${scratch.d_model}
- batch_first: False
- cross_attention:
- _target_: sam3.model.attention.MultiheadAttention
- attn_type: Vanilla
- num_heads: 8
- dropout: ${scratch.encoder_dropout}
- embed_dim: ${scratch.d_model}
- batch_first: False
- add_cls: true
- add_post_encode_proj: True
- boxRPB: "log"
- dac: True
- use_early_fusion: true
- o2m_mask: false
- num_feature_levels: 1 # > 1 not implemented
- encoder_dropout: 0.1
- decoder_dropout: 0.1
- tokenizer_ve:
- _target_: sam3.model.tokenizer_ve.SimpleTokenizer
- bpe_path: ${paths.bpe_path}
- freeze_text_tower: False
- freeze_image_tower: NoFreeze
- vis_backbone_dp: 0.0
- # Activation checkpointing (Save memory)
- use_act_checkpoint_vision_backbone: True
- use_act_checkpoint_text_backbone: True
- use_act_checkpoint_encoder: True
- use_act_checkpoint_decoder: True
- loss: null
- # Loss parameters
- num_queries: 200
- presence_weight: 20.0
- loss_ce_weight: 20.0
- iabce_pos_weight: 5.0
- iabce_pos_focal: false
- iabce_alpha: 0.25
- instance_query_loss_pad_scale_pos: 1.0
- use_o2m_matcher_on_o2m_aux: false
- # Model parameters
- use_instance_query: true
- d_model: 256
- pos_embed:
- _target_: sam3.model.position_encoding.PositionEmbeddingSine
- num_pos_feats: ${scratch.d_model}
- normalize: true
- scale: null
- temperature: 10000
- # Box processing
- use_presence_eval: True
- original_box_postprocessor:
- _target_: sam3.eval.postprocessors.PostProcessImage
- max_dets_per_img: -1 # infinite detections
- use_original_ids: true
- use_original_sizes_box: true
- use_presence: ${scratch.use_presence_eval}
- # Matcher configuration
- matcher:
- _target_: sam3.train.matcher.BinaryHungarianMatcherV2
- focal: true
- cost_class: 2.0
- cost_bbox: 5.0
- cost_giou: 2.0
- alpha: 0.25
- gamma: 2
- stable: False
- scale_by_find_batch_size: True
- # Image processing parameters
- resolution: 1008
- consistent_transform: False
- max_ann_per_img: 200
- # Normalization parameters
- train_norm_mean: [0.5, 0.5, 0.5]
- train_norm_std: [0.5, 0.5, 0.5]
- val_norm_mean: [0.5, 0.5, 0.5]
- val_norm_std: [0.5, 0.5, 0.5]
- # Training parameters
- train_batch_size: 1
- val_batch_size: 1
- num_train_workers: 0
- num_val_workers: 0
- max_data_epochs: 40
- target_epoch_size: 1500
- hybrid_repeats: 1
- context_length: 2
- gather_pred_via_filesys: false
- # Learning rate and scheduler parameters
- lr_scale: 0.1
- lr_transformer: ${times:8e-4,${scratch.lr_scale}}
- lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
- lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
- lrd_vision_backbone: 0.9
- wd: 0.1
- scheduler_timescale: 20
- scheduler_warmup: 20
- scheduler_cooldown: 20
- # ============================================================================
- # Trainer Configuration
- # ============================================================================
- trainer:
- _target_: sam3.train.trainer.Trainer
- skip_saving_ckpts: true
- # _target_: sam3.train.trainer.Trainer
- # skip_saving_ckpts: true
- empty_gpu_mem_cache_after_eval: True
- skip_first_val: True
- max_epochs: ${scratch.max_data_epochs}
- accelerator: cuda
- seed_value: 123
- val_epoch_freq: 10
- mode: train
- distributed:
- backend: nccl
- find_unused_parameters: True
- gradient_as_bucket_view: True
- loss:
- all: ${odinw_train.loss}
- default:
- _target_: sam3.train.loss.sam3_loss.DummyLoss
- data:
- train:
- _target_: sam3.train.data.torch_dataset.TorchDataset
- dataset:
- _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
- limit_ids: ${odinw_train.num_images}
- transforms: ${odinw_train.train_transforms}
- load_segmentation: ${scratch.enable_segmentation}
- max_ann_per_img: 500000
- multiplier: 1
- max_train_queries: 50000
- max_val_queries: 50000
- training: true
- use_caching: False
- img_folder: ${paths.odinw_data_root}/${odinw_train.supercategory_tuple.train.img_folder}
- ann_file:
- _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
- input_json_path: ${paths.odinw_data_root}/${odinw_train.supercategory_tuple.train.json}
- coco_json_loader:
- _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
- prompts: ${odinw35_prompts.${odinw_train.supercategory_tuple.name}} #${odinw_train.supercategory_tuple.name)
- _partial_: true
- shuffle: True
- batch_size: ${scratch.train_batch_size}
- num_workers: ${scratch.num_train_workers}
- pin_memory: False
- drop_last: True
- collate_fn:
- _target_: sam3.train.data.collator.collate_fn_api
- _partial_: true
- repeats: ${scratch.hybrid_repeats}
- dict_key: all
- with_seg_masks: ${scratch.enable_segmentation}
- val:
- _target_: sam3.train.data.torch_dataset.TorchDataset
- dataset:
- _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
- load_segmentation: ${scratch.enable_segmentation}
- coco_json_loader:
- _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
- prompts: ${odinw35_prompts.${odinw_train.supercategory_tuple.name}}
- include_negatives: true
- category_chunk_size: 20 # Note: Since we are doing AP +ve we need to include all categories!
- _partial_: true
- img_folder: ${paths.odinw_data_root}/${odinw_train.supercategory_tuple.val.img_folder}
- ann_file:
- _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
- input_json_path: ${paths.odinw_data_root}/${odinw_train.supercategory_tuple.val.json}
- transforms: ${odinw_train.val_transforms}
- max_ann_per_img: 100000
- multiplier: 1
- training: false
- shuffle: False
- batch_size: ${scratch.val_batch_size}
- num_workers: ${scratch.num_val_workers}
- pin_memory: False
- drop_last: False
- collate_fn:
- _target_: sam3.train.data.collator.collate_fn_api
- _partial_: true
- repeats: 1
- dict_key: odinw35
- with_seg_masks: ${scratch.enable_segmentation}
- model:
- _target_: sam3.model_builder.build_sam3_image_model
- bpe_path: ${paths.bpe_path}
- device: cpus
- eval_mode: false # Set to false if training
- enable_segmentation: ${scratch.enable_segmentation} # Warning: Enable this if using segmentation.
- meters:
- val:
- odinw35:
- detection:
- _target_: sam3.eval.coco_writer.PredictionDumper
- iou_type: "bbox"
- dump_dir: ${launcher.experiment_log_dir}/dumps/odinw/${odinw_train.supercategory_tuple.name}
- merge_predictions: True
- postprocessor: ${scratch.original_box_postprocessor}
- gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
- maxdets: 100
- pred_file_evaluators:
- - _target_: sam3.eval.coco_eval_offline.CocoEvaluatorOfflineWithPredFileEvaluators
- gt_path:
- _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
- input_json_path: ${paths.odinw_data_root}/${odinw_train.supercategory_tuple.val.json}
- tide: False
- iou_type: "bbox"
- positive_split: False
- optim:
- amp:
- enabled: True
- amp_dtype: bfloat16
- optimizer:
- _target_: torch.optim.AdamW
- gradient_clip:
- _target_: sam3.train.optim.optimizer.GradientClipper
- max_norm: 0.1
- norm_type: 2
- param_group_modifiers:
- - _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
- _partial_: True
- layer_decay_value: ${scratch.lrd_vision_backbone}
- apply_to: 'backbone.vision_backbone.trunk'
- overrides:
- - pattern: '*pos_embed*'
- value: 1.0
- options:
- lr:
- - scheduler: # transformer and class_embed
- _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
- base_lr: ${scratch.lr_transformer}
- timescale: ${scratch.scheduler_timescale}
- warmup_steps: ${scratch.scheduler_warmup}
- cooldown_steps: ${scratch.scheduler_cooldown}
- - scheduler:
- _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
- base_lr: ${scratch.lr_vision_backbone}
- timescale: ${scratch.scheduler_timescale}
- warmup_steps: ${scratch.scheduler_warmup}
- cooldown_steps: ${scratch.scheduler_cooldown}
- param_names:
- - 'backbone.vision_backbone.*'
- - scheduler:
- _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
- base_lr: ${scratch.lr_language_backbone}
- timescale: ${scratch.scheduler_timescale}
- warmup_steps: ${scratch.scheduler_warmup}
- cooldown_steps: ${scratch.scheduler_cooldown}
- param_names:
- - 'backbone.language_backbone.*'
- weight_decay:
- - scheduler:
- _target_: fvcore.common.param_scheduler.ConstantParamScheduler
- value: ${scratch.wd}
- - scheduler:
- _target_: fvcore.common.param_scheduler.ConstantParamScheduler
- value: 0.0
- param_names:
- - '*bias*'
- module_cls_names: ['torch.nn.LayerNorm']
- checkpoint:
- save_dir: ${launcher.experiment_log_dir}/checkpoints
- save_freq: 0 # 0 only last checkpoint is saved.
- logging:
- tensorboard_writer:
- _target_: sam3.train.utils.logger.make_tensorboard_logger
- log_dir: ${launcher.experiment_log_dir}/tensorboard
- flush_secs: 120
- should_log: True
- wandb_writer: null
- log_dir: ${launcher.experiment_log_dir}/logs/${odinw_train.supercategory_tuple.name}
- log_freq: 10
- # ============================================================================
- # Launcher and Submitit Configuration
- # ============================================================================
- launcher:
- num_nodes: 1
- gpus_per_node: 2
- experiment_log_dir: null #${paths.experiment_log_dir}
- multiprocessing_context: forkserver
- submitit:
- account: null
- partition: null
- qos: null
- timeout_hour: 72
- use_cluster: True
- cpus_per_task: 10
- port_range: [10000, 65000]
- constraint: null
- # task_index: 2
- # Uncomment for job array configuration
- job_array:
- num_tasks: 13
- task_index: 0
- # ============================================================================
- # ODinW13 Supercategories
- # ============================================================================
- all_odinw_supercategories:
- - name: AerialMaritimeDrone_large
- val:
- img_folder: AerialMaritimeDrone/large/test/
- json: AerialMaritimeDrone/large/test/annotations_without_background.json
- train:
- img_folder: AerialMaritimeDrone/large/train/
- json: AerialMaritimeDrone/large/train/${odinw_train.train_file}.json
- - name: Aquarium
- val:
- img_folder: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/
- json: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/annotations_without_background.json
- train:
- img_folder: Aquarium/Aquarium Combined.v2-raw-1024.coco/train/
- json: Aquarium/Aquarium Combined.v2-raw-1024.coco/train/${odinw_train.train_file}.json
- - name: CottontailRabbits
- val:
- img_folder: CottontailRabbits/test/
- json: CottontailRabbits/test/annotations_without_background.json
- train:
- img_folder: CottontailRabbits/train/
- json: CottontailRabbits/train/${odinw_train.train_file}.json
- - name: EgoHands_generic
- val:
- img_folder: EgoHands/generic/test/
- json: EgoHands/generic/test/annotations_without_background.json
- train:
- img_folder: EgoHands/generic/train/
- json: EgoHands/generic/train/${odinw_train.train_file}.json
- - name: NorthAmericaMushrooms
- val:
- img_folder: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/
- json: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/annotations_without_background.json
- train:
- img_folder: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/train/
- json: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/train/${odinw_train.train_file}.json
- - name: Packages
- val:
- img_folder: Packages/Raw/test/
- json: Packages/Raw/test/annotations_without_background.json
- train:
- img_folder: Packages/Raw/train/
- json: Packages/Raw/train/${odinw_train.train_file}.json
- - name: PascalVOC
- val:
- img_folder: PascalVOC/valid/
- json: PascalVOC/valid/annotations_without_background.json
- train:
- img_folder: PascalVOC/train/
- json: PascalVOC/train/${odinw_train.train_file}.json
- - name: Raccoon
- val:
- img_folder: Raccoon/Raccoon.v2-raw.coco/test/
- json: Raccoon/Raccoon.v2-raw.coco/test/annotations_without_background.json
- train:
- img_folder: Raccoon/Raccoon.v2-raw.coco/train/
- json: Raccoon/Raccoon.v2-raw.coco/train/${odinw_train.train_file}.json
- - name: ShellfishOpenImages
- val:
- img_folder: ShellfishOpenImages/raw/test/
- json: ShellfishOpenImages/raw/test/annotations_without_background.json
- train:
- img_folder: ShellfishOpenImages/raw/train/
- json: ShellfishOpenImages/raw/train/${odinw_train.train_file}.json
- - name: VehiclesOpenImages
- val:
- img_folder: VehiclesOpenImages/416x416/test/
- json: VehiclesOpenImages/416x416/test/annotations_without_background.json
- train:
- img_folder: VehiclesOpenImages/416x416/train/
- json: VehiclesOpenImages/416x416/train/${odinw_train.train_file}.json
- - name: pistols
- val:
- img_folder: pistols/export/
- json: pistols/export/test_annotations_without_background.json
- train:
- img_folder: pistols/export/
- json: pistols/export/${odinw_train.train_file}.json
- - name: pothole
- val:
- img_folder: pothole/test/
- json: pothole/test/annotations_without_background.json
- train:
- img_folder: pothole/train/
- json: pothole/train/${odinw_train.train_file}.json
- - name: thermalDogsAndPeople
- val:
- img_folder: thermalDogsAndPeople/test/
- json: thermalDogsAndPeople/test/annotations_without_background.json
- train:
- img_folder: thermalDogsAndPeople/train/
- json: thermalDogsAndPeople/train/${odinw_train.train_file}.json
- odinw35_prompts:
- AerialMaritimeDrone_large: '[{"id": 1, "name": "boat", "supercategory": "movable-objects"},
- {"id": 2, "name": "car", "supercategory": "movable-objects"}, {"id": 3, "name": "dock",
- "supercategory": "movable-objects"}, {"id": 4, "name": "jet ski", "supercategory": "movable-objects"},
- {"id": 5, "name": "boat lift", "supercategory": "movable-objects"}]'
- Aquarium: null
- CottontailRabbits: null
- EgoHands_generic: null
- NorthAmericaMushrooms: '[{''id'': 1, ''name'':
- ''chicken of the woods'', ''supercategory'': ''mushroom''}, {''id'': 2, ''name'': ''chanterelle'', ''supercategory'': ''mushroom''}]'
- Packages: null
- PascalVOC: null
- Raccoon: null
- ShellfishOpenImages: null
- VehiclesOpenImages: null
- pistols: null
- pothole: null
- thermalDogsAndPeople: null
|