|
| 1 | +# @package _global_ |
| 2 | +defaults: |
| 3 | + - _self_ |
| 4 | + |
| 5 | +# This config is the base configuration for all evaluations. Amongst other things, it defines: |
| 6 | +# - the model |
| 7 | +# - the image transforms |
| 8 | +# - the post processors |
| 9 | +# - cluster configuration (only relevant for slurm-based evals, ignored otherwise) |
| 10 | +# |
| 11 | +# Most of the parameters should be kept as-is. The main modifications you may want to make are: |
| 12 | +# - the cluster configuration, to adjust partitions/qos to your system |
| 13 | +# - the flag gather_pred_via_filesys if you ram is tight |
| 14 | +# - num_val_workers if your number of cores is small (should be roughly number of cores / number of gpus) |
| 15 | +# - the paths below |
| 16 | + |
| 17 | + |
| 18 | +# ============================================================================ |
| 19 | +# Paths Configuration (Chage this to your own paths) |
| 20 | +# ============================================================================ |
| 21 | +paths: |
| 22 | + # If you leave the checkpoint path to null, the model will be downloaded from hugging-face. Otherwise provide a path |
| 23 | + checkpoint_path: null |
| 24 | + # the experiments will be subfolders of this |
| 25 | + base_experiment_log_dir: <YOUR EXPERIMENET LOG_DIR> |
| 26 | + |
| 27 | + # base path to the annotation folder for gold (refer to the readmes on how to download) |
| 28 | + base_annotation_path: <YOUR_GOLD_GT_DIR> |
| 29 | + |
| 30 | + # base path to the annotation folder for silver (refer to the readmes on how to download) |
| 31 | + base_annotation_path_silver: <YOUR_SILVER_GT_DIR> |
| 32 | + |
| 33 | + # path to the metaclip images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset. |
| 34 | + metaclip_img_path: <YOUR_METACLIP_IMG_DIR> |
| 35 | + |
| 36 | + # path to the sa1b images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset. |
| 37 | + sa1b_img_path: <YOUR_SA1B_IMG_DIR> |
| 38 | + |
| 39 | + # path to the SA-Co/silver images |
| 40 | + silver_img_path: <YOUR_SILVER_IMG_DIR> |
| 41 | + |
| 42 | + bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz |
| 43 | + |
| 44 | + |
| 45 | +# ============================================================================ |
| 46 | +# Different helper parameters and functions |
| 47 | +# ============================================================================ |
| 48 | +scratch: |
| 49 | + |
| 50 | + use_presence_eval: True |
| 51 | + |
| 52 | + base_val_transform: |
| 53 | + - _target_: sam3.train.transforms.basic_for_api.ComposeAPI |
| 54 | + transforms: |
| 55 | + ######## transforms for validation (begin) ######## |
| 56 | + - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI |
| 57 | + sizes: ${scratch.resolution} # originally `resolution: 1024` |
| 58 | + max_size: |
| 59 | + _target_: sam3.train.transforms.basic.get_random_resize_max_size |
| 60 | + size: ${scratch.resolution} # originally `resolution: 1024` |
| 61 | + square: true |
| 62 | + consistent_transform: False |
| 63 | + ######## transforms for validation (end) ######## |
| 64 | + - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI |
| 65 | + - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI |
| 66 | + mean: ${scratch.val_norm_mean} |
| 67 | + std: ${scratch.val_norm_std} |
| 68 | + |
| 69 | + loss: null |
| 70 | + |
| 71 | + # Model parameters |
| 72 | + d_model: 256 |
| 73 | + input_box_embedding_dim: ${add:${scratch.d_model},2} |
| 74 | + |
| 75 | + # Box processing |
| 76 | + original_box_postprocessor: |
| 77 | + _target_: sam3.eval.postprocessors.PostProcessImage |
| 78 | + max_dets_per_img: -1 # infinite detections |
| 79 | + use_original_ids: true |
| 80 | + use_original_sizes_box: true |
| 81 | + use_presence: ${scratch.use_presence_eval} |
| 82 | + |
| 83 | + box_postprocessor: |
| 84 | + _target_: sam3.eval.postprocessors.PostProcessImage |
| 85 | + max_dets_per_img: -1 #infinite detections |
| 86 | + use_original_ids: false |
| 87 | + use_original_sizes_box: false |
| 88 | + use_presence: ${scratch.use_presence_eval} |
| 89 | + |
| 90 | + box_postprocessor_thresholded: |
| 91 | + _target_: sam3.eval.postprocessors.PostProcessImage |
| 92 | + max_dets_per_img: -1 #infinite detections |
| 93 | + use_original_ids: false |
| 94 | + use_original_sizes_box: false |
| 95 | + detection_threshold: 0.3 |
| 96 | + use_presence: ${scratch.use_presence_eval} |
| 97 | + |
| 98 | + mask_postprocessor_thresholded: |
| 99 | + _target_: sam3.eval.postprocessors.PostProcessImage |
| 100 | + max_dets_per_img: -1 #infinite detections |
| 101 | + iou_type: "segm" |
| 102 | + use_original_ids: false |
| 103 | + use_original_sizes_box: false |
| 104 | + use_original_sizes_mask: true |
| 105 | + convert_mask_to_rle: True |
| 106 | + detection_threshold: 0.3 |
| 107 | + use_presence: ${scratch.use_presence_eval} |
| 108 | + |
| 109 | + # Image processing parameters |
| 110 | + resolution: 1008 |
| 111 | + max_ann_per_img: 200 |
| 112 | + |
| 113 | + # Normalization parameters |
| 114 | + train_norm_mean: [0.5, 0.5, 0.5] |
| 115 | + train_norm_std: [0.5, 0.5, 0.5] |
| 116 | + val_norm_mean: [0.5, 0.5, 0.5] |
| 117 | + val_norm_std: [0.5, 0.5, 0.5] |
| 118 | + |
| 119 | + # Training parameters |
| 120 | + train_batch_size: 1 |
| 121 | + val_batch_size: 1 |
| 122 | + num_train_workers: 0 |
| 123 | + num_val_workers: 10 # change this depending on the number of cpu cores available |
| 124 | + max_data_epochs: 20 |
| 125 | + target_epoch_size: 1500 |
| 126 | + hybrid_repeats: 1 |
| 127 | + context_length: 2 |
| 128 | + |
| 129 | + # All reduce - this controls how the predictions are sent back to node 0. |
| 130 | + # If you have a lot of ram, CPU gather is faster. Otherwise, we provide a fallback through filesystem (eg NFS) |
| 131 | + # Switch to true if you get cpu ooms during gather. |
| 132 | + gather_pred_via_filesys: false |
| 133 | + |
| 134 | + # Learning rate and scheduler parameters (unused for eval) |
| 135 | + lr_scale: 0.1 |
| 136 | + lr_transformer: ${times:8e-4,${scratch.lr_scale}} |
| 137 | + lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}} |
| 138 | + lr_language_backbone: ${times:5e-5,${scratch.lr_scale}} |
| 139 | + lrd_vision_backbone: 0.9 # (lower for in-domain adn higher for ood) |
| 140 | + wd: 0.1 |
| 141 | + scheduler_timescale: 20 |
| 142 | + scheduler_warmup: 20 |
| 143 | + scheduler_cooldown: 20 |
| 144 | + |
| 145 | + |
| 146 | +# ============================================================================ |
| 147 | +# Trainer Configuration |
| 148 | +# ============================================================================ |
| 149 | + |
| 150 | +trainer: |
| 151 | + _target_: sam3.train.trainer.Trainer |
| 152 | + skip_saving_ckpts: true |
| 153 | + empty_gpu_mem_cache_after_eval: True |
| 154 | + skip_first_val: True |
| 155 | + max_epochs: ${scratch.max_data_epochs} |
| 156 | + accelerator: cuda |
| 157 | + seed_value: 123 |
| 158 | + val_epoch_freq: 10 |
| 159 | + mode: val |
| 160 | + |
| 161 | + distributed: |
| 162 | + backend: nccl |
| 163 | + find_unused_parameters: True |
| 164 | + gradient_as_bucket_view: True |
| 165 | + |
| 166 | + loss: |
| 167 | + all: |
| 168 | + _target_: sam3.train.loss.sam3_loss.DummyLoss |
| 169 | + default: |
| 170 | + _target_: sam3.train.loss.sam3_loss.DummyLoss |
| 171 | + |
| 172 | + data: |
| 173 | + train: null |
| 174 | + val: null |
| 175 | + |
| 176 | + model: |
| 177 | + _target_: sam3.model_builder.build_sam3_image_model |
| 178 | + bpe_path: ${paths.bpe_path} |
| 179 | + device: cpus |
| 180 | + eval_mode: true |
| 181 | + enable_segmentation: true # Warning: Enable this if using segmentation. |
| 182 | + checkpoint_path: ${paths.checkpoint_path} |
| 183 | + |
| 184 | + meters: |
| 185 | + val: null |
| 186 | + |
| 187 | + optim: |
| 188 | + amp: |
| 189 | + enabled: True |
| 190 | + amp_dtype: bfloat16 |
| 191 | + |
| 192 | + optimizer: |
| 193 | + _target_: torch.optim.AdamW |
| 194 | + |
| 195 | + gradient_clip: |
| 196 | + _target_: sam3.train.optim.optimizer.GradientClipper |
| 197 | + max_norm: 0.1 |
| 198 | + norm_type: 2 |
| 199 | + |
| 200 | + param_group_modifiers: |
| 201 | + - _target_: sam3.train.optim.optimizer.layer_decay_param_modifier |
| 202 | + _partial_: True |
| 203 | + layer_decay_value: ${scratch.lrd_vision_backbone} |
| 204 | + apply_to: 'backbone.vision_backbone.trunk' |
| 205 | + overrides: |
| 206 | + - pattern: '*pos_embed*' |
| 207 | + value: 1.0 |
| 208 | + |
| 209 | + options: |
| 210 | + lr: |
| 211 | + - scheduler: # transformer and class_embed |
| 212 | + _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler |
| 213 | + base_lr: ${scratch.lr_transformer} |
| 214 | + timescale: ${scratch.scheduler_timescale} |
| 215 | + warmup_steps: ${scratch.scheduler_warmup} |
| 216 | + cooldown_steps: ${scratch.scheduler_cooldown} |
| 217 | + - scheduler: |
| 218 | + _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler |
| 219 | + base_lr: ${scratch.lr_vision_backbone} |
| 220 | + timescale: ${scratch.scheduler_timescale} |
| 221 | + warmup_steps: ${scratch.scheduler_warmup} |
| 222 | + cooldown_steps: ${scratch.scheduler_cooldown} |
| 223 | + param_names: |
| 224 | + - 'backbone.vision_backbone.*' |
| 225 | + - scheduler: |
| 226 | + _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler |
| 227 | + base_lr: ${scratch.lr_language_backbone} |
| 228 | + timescale: ${scratch.scheduler_timescale} |
| 229 | + warmup_steps: ${scratch.scheduler_warmup} |
| 230 | + cooldown_steps: ${scratch.scheduler_cooldown} |
| 231 | + param_names: |
| 232 | + - 'backbone.language_backbone.*' |
| 233 | + |
| 234 | + weight_decay: |
| 235 | + - scheduler: |
| 236 | + _target_: fvcore.common.param_scheduler.ConstantParamScheduler |
| 237 | + value: ${scratch.wd} |
| 238 | + - scheduler: |
| 239 | + _target_: fvcore.common.param_scheduler.ConstantParamScheduler |
| 240 | + value: 0.0 |
| 241 | + param_names: |
| 242 | + - '*bias*' |
| 243 | + module_cls_names: ['torch.nn.LayerNorm'] |
| 244 | + |
| 245 | + checkpoint: |
| 246 | + save_dir: ${launcher.experiment_log_dir}/checkpoints |
| 247 | + save_freq: 0 # 0 only last checkpoint is saved. |
| 248 | + |
| 249 | + |
| 250 | + logging: |
| 251 | + tensorboard_writer: |
| 252 | + _target_: sam3.train.utils.logger.make_tensorboard_logger |
| 253 | + log_dir: ${launcher.experiment_log_dir}/tensorboard |
| 254 | + flush_secs: 120 |
| 255 | + should_log: True |
| 256 | + wandb_writer: null |
| 257 | + log_dir: ${launcher.experiment_log_dir}/logs/ |
| 258 | + log_freq: 10 |
| 259 | + |
| 260 | +# ============================================================================ |
| 261 | +# Launcher and Submitit Configuration |
| 262 | +# ============================================================================ |
| 263 | + |
| 264 | +launcher: |
| 265 | + num_nodes: 4 |
| 266 | + gpus_per_node: 8 |
| 267 | + experiment_log_dir: ${paths.experiment_log_dir} |
| 268 | + multiprocessing_context: forkserver |
| 269 | + |
| 270 | + |
| 271 | +submitit: |
| 272 | + account: null # Add your SLURM account if use_cluster == 1 |
| 273 | + partition: null |
| 274 | + qos: null # Add your QoS if use_cluster == 1 |
| 275 | + timeout_hour: 72 |
| 276 | + use_cluster: True |
| 277 | + cpus_per_task: 10 |
| 278 | + port_range: [10000, 65000] |
| 279 | + constraint: null |
0 commit comments