Skip to content

Commit 9db47df

Browse files
authored
Add files via upload
1 parent a3dfbb3 commit 9db47df

File tree

68 files changed

+17733
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+17733
-0
lines changed

train/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved

train/configs/eval_base.yaml

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
# @package _global_
2+
defaults:
3+
- _self_
4+
5+
# This config is the base configuration for all evaluations. Amongst other things, it defines:
6+
# - the model
7+
# - the image transforms
8+
# - the post processors
9+
# - cluster configuration (only relevant for slurm-based evals, ignored otherwise)
10+
#
11+
# Most of the parameters should be kept as-is. The main modifications you may want to make are:
12+
# - the cluster configuration, to adjust partitions/qos to your system
13+
# - the flag gather_pred_via_filesys if you ram is tight
14+
# - num_val_workers if your number of cores is small (should be roughly number of cores / number of gpus)
15+
# - the paths below
16+
17+
18+
# ============================================================================
19+
# Paths Configuration (Chage this to your own paths)
20+
# ============================================================================
21+
paths:
22+
# If you leave the checkpoint path to null, the model will be downloaded from hugging-face. Otherwise provide a path
23+
checkpoint_path: null
24+
# the experiments will be subfolders of this
25+
base_experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
26+
27+
# base path to the annotation folder for gold (refer to the readmes on how to download)
28+
base_annotation_path: <YOUR_GOLD_GT_DIR>
29+
30+
# base path to the annotation folder for silver (refer to the readmes on how to download)
31+
base_annotation_path_silver: <YOUR_SILVER_GT_DIR>
32+
33+
# path to the metaclip images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
34+
metaclip_img_path: <YOUR_METACLIP_IMG_DIR>
35+
36+
# path to the sa1b images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
37+
sa1b_img_path: <YOUR_SA1B_IMG_DIR>
38+
39+
# path to the SA-Co/silver images
40+
silver_img_path: <YOUR_SILVER_IMG_DIR>
41+
42+
bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
43+
44+
45+
# ============================================================================
46+
# Different helper parameters and functions
47+
# ============================================================================
48+
scratch:
49+
50+
use_presence_eval: True
51+
52+
base_val_transform:
53+
- _target_: sam3.train.transforms.basic_for_api.ComposeAPI
54+
transforms:
55+
######## transforms for validation (begin) ########
56+
- _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
57+
sizes: ${scratch.resolution} # originally `resolution: 1024`
58+
max_size:
59+
_target_: sam3.train.transforms.basic.get_random_resize_max_size
60+
size: ${scratch.resolution} # originally `resolution: 1024`
61+
square: true
62+
consistent_transform: False
63+
######## transforms for validation (end) ########
64+
- _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
65+
- _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
66+
mean: ${scratch.val_norm_mean}
67+
std: ${scratch.val_norm_std}
68+
69+
loss: null
70+
71+
# Model parameters
72+
d_model: 256
73+
input_box_embedding_dim: ${add:${scratch.d_model},2}
74+
75+
# Box processing
76+
original_box_postprocessor:
77+
_target_: sam3.eval.postprocessors.PostProcessImage
78+
max_dets_per_img: -1 # infinite detections
79+
use_original_ids: true
80+
use_original_sizes_box: true
81+
use_presence: ${scratch.use_presence_eval}
82+
83+
box_postprocessor:
84+
_target_: sam3.eval.postprocessors.PostProcessImage
85+
max_dets_per_img: -1 #infinite detections
86+
use_original_ids: false
87+
use_original_sizes_box: false
88+
use_presence: ${scratch.use_presence_eval}
89+
90+
box_postprocessor_thresholded:
91+
_target_: sam3.eval.postprocessors.PostProcessImage
92+
max_dets_per_img: -1 #infinite detections
93+
use_original_ids: false
94+
use_original_sizes_box: false
95+
detection_threshold: 0.3
96+
use_presence: ${scratch.use_presence_eval}
97+
98+
mask_postprocessor_thresholded:
99+
_target_: sam3.eval.postprocessors.PostProcessImage
100+
max_dets_per_img: -1 #infinite detections
101+
iou_type: "segm"
102+
use_original_ids: false
103+
use_original_sizes_box: false
104+
use_original_sizes_mask: true
105+
convert_mask_to_rle: True
106+
detection_threshold: 0.3
107+
use_presence: ${scratch.use_presence_eval}
108+
109+
# Image processing parameters
110+
resolution: 1008
111+
max_ann_per_img: 200
112+
113+
# Normalization parameters
114+
train_norm_mean: [0.5, 0.5, 0.5]
115+
train_norm_std: [0.5, 0.5, 0.5]
116+
val_norm_mean: [0.5, 0.5, 0.5]
117+
val_norm_std: [0.5, 0.5, 0.5]
118+
119+
# Training parameters
120+
train_batch_size: 1
121+
val_batch_size: 1
122+
num_train_workers: 0
123+
num_val_workers: 10 # change this depending on the number of cpu cores available
124+
max_data_epochs: 20
125+
target_epoch_size: 1500
126+
hybrid_repeats: 1
127+
context_length: 2
128+
129+
# All reduce - this controls how the predictions are sent back to node 0.
130+
# If you have a lot of ram, CPU gather is faster. Otherwise, we provide a fallback through filesystem (eg NFS)
131+
# Switch to true if you get cpu ooms during gather.
132+
gather_pred_via_filesys: false
133+
134+
# Learning rate and scheduler parameters (unused for eval)
135+
lr_scale: 0.1
136+
lr_transformer: ${times:8e-4,${scratch.lr_scale}}
137+
lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
138+
lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
139+
lrd_vision_backbone: 0.9 # (lower for in-domain adn higher for ood)
140+
wd: 0.1
141+
scheduler_timescale: 20
142+
scheduler_warmup: 20
143+
scheduler_cooldown: 20
144+
145+
146+
# ============================================================================
147+
# Trainer Configuration
148+
# ============================================================================
149+
150+
trainer:
151+
_target_: sam3.train.trainer.Trainer
152+
skip_saving_ckpts: true
153+
empty_gpu_mem_cache_after_eval: True
154+
skip_first_val: True
155+
max_epochs: ${scratch.max_data_epochs}
156+
accelerator: cuda
157+
seed_value: 123
158+
val_epoch_freq: 10
159+
mode: val
160+
161+
distributed:
162+
backend: nccl
163+
find_unused_parameters: True
164+
gradient_as_bucket_view: True
165+
166+
loss:
167+
all:
168+
_target_: sam3.train.loss.sam3_loss.DummyLoss
169+
default:
170+
_target_: sam3.train.loss.sam3_loss.DummyLoss
171+
172+
data:
173+
train: null
174+
val: null
175+
176+
model:
177+
_target_: sam3.model_builder.build_sam3_image_model
178+
bpe_path: ${paths.bpe_path}
179+
device: cpus
180+
eval_mode: true
181+
enable_segmentation: true # Warning: Enable this if using segmentation.
182+
checkpoint_path: ${paths.checkpoint_path}
183+
184+
meters:
185+
val: null
186+
187+
optim:
188+
amp:
189+
enabled: True
190+
amp_dtype: bfloat16
191+
192+
optimizer:
193+
_target_: torch.optim.AdamW
194+
195+
gradient_clip:
196+
_target_: sam3.train.optim.optimizer.GradientClipper
197+
max_norm: 0.1
198+
norm_type: 2
199+
200+
param_group_modifiers:
201+
- _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
202+
_partial_: True
203+
layer_decay_value: ${scratch.lrd_vision_backbone}
204+
apply_to: 'backbone.vision_backbone.trunk'
205+
overrides:
206+
- pattern: '*pos_embed*'
207+
value: 1.0
208+
209+
options:
210+
lr:
211+
- scheduler: # transformer and class_embed
212+
_target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
213+
base_lr: ${scratch.lr_transformer}
214+
timescale: ${scratch.scheduler_timescale}
215+
warmup_steps: ${scratch.scheduler_warmup}
216+
cooldown_steps: ${scratch.scheduler_cooldown}
217+
- scheduler:
218+
_target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
219+
base_lr: ${scratch.lr_vision_backbone}
220+
timescale: ${scratch.scheduler_timescale}
221+
warmup_steps: ${scratch.scheduler_warmup}
222+
cooldown_steps: ${scratch.scheduler_cooldown}
223+
param_names:
224+
- 'backbone.vision_backbone.*'
225+
- scheduler:
226+
_target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
227+
base_lr: ${scratch.lr_language_backbone}
228+
timescale: ${scratch.scheduler_timescale}
229+
warmup_steps: ${scratch.scheduler_warmup}
230+
cooldown_steps: ${scratch.scheduler_cooldown}
231+
param_names:
232+
- 'backbone.language_backbone.*'
233+
234+
weight_decay:
235+
- scheduler:
236+
_target_: fvcore.common.param_scheduler.ConstantParamScheduler
237+
value: ${scratch.wd}
238+
- scheduler:
239+
_target_: fvcore.common.param_scheduler.ConstantParamScheduler
240+
value: 0.0
241+
param_names:
242+
- '*bias*'
243+
module_cls_names: ['torch.nn.LayerNorm']
244+
245+
checkpoint:
246+
save_dir: ${launcher.experiment_log_dir}/checkpoints
247+
save_freq: 0 # 0 only last checkpoint is saved.
248+
249+
250+
logging:
251+
tensorboard_writer:
252+
_target_: sam3.train.utils.logger.make_tensorboard_logger
253+
log_dir: ${launcher.experiment_log_dir}/tensorboard
254+
flush_secs: 120
255+
should_log: True
256+
wandb_writer: null
257+
log_dir: ${launcher.experiment_log_dir}/logs/
258+
log_freq: 10
259+
260+
# ============================================================================
261+
# Launcher and Submitit Configuration
262+
# ============================================================================
263+
264+
launcher:
265+
num_nodes: 4
266+
gpus_per_node: 8
267+
experiment_log_dir: ${paths.experiment_log_dir}
268+
multiprocessing_context: forkserver
269+
270+
271+
submitit:
272+
account: null # Add your SLURM account if use_cluster == 1
273+
partition: null
274+
qos: null # Add your QoS if use_cluster == 1
275+
timeout_hour: 72
276+
use_cluster: True
277+
cpus_per_task: 10
278+
port_range: [10000, 65000]
279+
constraint: null
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# @package _global_
2+
defaults:
3+
- /configs/eval_base.yaml
4+
- _self_
5+
6+
# ============================================================================
7+
# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
8+
# ============================================================================
9+
paths:
10+
experiment_log_dir: ${paths.base_experiment_log_dir}/gold_attributes/
11+
coco_gt: ${paths.base_annotation_path}/gold_attributes_merged_a_release_test.json
12+
coco_gts:
13+
- ${paths.base_annotation_path}/gold_attributes_merged_a_release_test.json
14+
- ${paths.base_annotation_path}/gold_attributes_merged_b_release_test.json
15+
- ${paths.base_annotation_path}/gold_attributes_merged_c_release_test.json
16+
17+
18+
# ============================================================================
19+
# Trainer Configuration
20+
# ============================================================================
21+
22+
trainer:
23+
data:
24+
val:
25+
_target_: sam3.train.data.torch_dataset.TorchDataset
26+
dataset:
27+
_target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
28+
coco_json_loader:
29+
_target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
30+
_partial_: true
31+
img_folder: ${paths.metaclip_img_path}
32+
ann_file: ${paths.coco_gt}
33+
transforms: ${scratch.base_val_transform}
34+
max_ann_per_img: 100000
35+
multiplier: 1
36+
training: false
37+
38+
shuffle: False
39+
batch_size: ${scratch.val_batch_size}
40+
num_workers: ${scratch.num_val_workers}
41+
pin_memory: False
42+
drop_last: False
43+
collate_fn:
44+
_target_: sam3.train.data.collator.collate_fn_api
45+
_partial_: true
46+
repeats: ${scratch.hybrid_repeats}
47+
dict_key: gold_attributes
48+
49+
meters:
50+
val:
51+
gold_attributes: # this key matches the "dict_key" in the dataloader's collate function
52+
cgf1:
53+
_target_: sam3.eval.coco_writer.PredictionDumper
54+
iou_type: "segm"
55+
dump_dir: ${launcher.experiment_log_dir}/dumps/gold_attributes
56+
merge_predictions: True
57+
postprocessor: ${scratch.mask_postprocessor_thresholded}
58+
gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
59+
maxdets: 1000000 # no limit
60+
pred_file_evaluators:
61+
- _target_: sam3.eval.cgf1_eval.CGF1Evaluator
62+
gt_path: ${paths.coco_gts}
63+
iou_type: "bbox"
64+
- _target_: sam3.eval.cgf1_eval.CGF1Evaluator
65+
gt_path: ${paths.coco_gts}
66+
iou_type: "segm"

0 commit comments

Comments
 (0)