|
| 1 | +# Copyright (c) Meta Platforms, Inc. and affiliates. |
| 2 | +# All rights reserved. |
| 3 | + |
| 4 | +# This source code is licensed under the license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. |
| 6 | + |
| 7 | +import logging |
| 8 | +import os |
| 9 | + |
| 10 | +import torch |
| 11 | +from hydra import compose |
| 12 | +from hydra.utils import instantiate |
| 13 | +from omegaconf import OmegaConf |
| 14 | + |
| 15 | +import sam2 |
| 16 | + |
| 17 | +# Check if the user is running Python from the parent directory of the sam2 repo |
| 18 | +# (i.e. the directory where this repo is cloned into) -- this is not supported since |
| 19 | +# it could shadow the sam2 package and cause issues. |
| 20 | +if os.path.isdir(os.path.join(sam2.__path__[0], "sam2")): |
| 21 | + # If the user has "sam2/sam2" in their path, they are likey importing the repo itself |
| 22 | + # as "sam2" rather than importing the "sam2" python package (i.e. "sam2/sam2" directory). |
| 23 | + # This typically happens because the user is running Python from the parent directory |
| 24 | + # that contains the sam2 repo they cloned. |
| 25 | + raise RuntimeError( |
| 26 | + "You're likely running Python from the parent directory of the sam2 repository " |
| 27 | + "(i.e. the directory where https://github.com/facebookresearch/sam2 is cloned into). " |
| 28 | + "This is not supported since the `sam2` Python package could be shadowed by the " |
| 29 | + "repository name (the repository is also named `sam2` and contains the Python package " |
| 30 | + "in `sam2/sam2`). Please run Python from another directory (e.g. from the repo dir " |
| 31 | + "rather than its parent dir, or from your home directory) after installing SAM 2." |
| 32 | + ) |
| 33 | + |
| 34 | + |
| 35 | +HF_MODEL_ID_TO_FILENAMES = { |
| 36 | + "facebook/sam2-hiera-tiny": ( |
| 37 | + "configs/sam2/sam2_hiera_t.yaml", |
| 38 | + "sam2_hiera_tiny.pt", |
| 39 | + ), |
| 40 | + "facebook/sam2-hiera-small": ( |
| 41 | + "configs/sam2/sam2_hiera_s.yaml", |
| 42 | + "sam2_hiera_small.pt", |
| 43 | + ), |
| 44 | + "facebook/sam2-hiera-base-plus": ( |
| 45 | + "configs/sam2/sam2_hiera_b+.yaml", |
| 46 | + "sam2_hiera_base_plus.pt", |
| 47 | + ), |
| 48 | + "facebook/sam2-hiera-large": ( |
| 49 | + "configs/sam2/sam2_hiera_l.yaml", |
| 50 | + "sam2_hiera_large.pt", |
| 51 | + ), |
| 52 | + "facebook/sam2.1-hiera-tiny": ( |
| 53 | + "configs/sam2.1/sam2.1_hiera_t.yaml", |
| 54 | + "sam2.1_hiera_tiny.pt", |
| 55 | + ), |
| 56 | + "facebook/sam2.1-hiera-small": ( |
| 57 | + "configs/sam2.1/sam2.1_hiera_s.yaml", |
| 58 | + "sam2.1_hiera_small.pt", |
| 59 | + ), |
| 60 | + "facebook/sam2.1-hiera-base-plus": ( |
| 61 | + "configs/sam2.1/sam2.1_hiera_b+.yaml", |
| 62 | + "sam2.1_hiera_base_plus.pt", |
| 63 | + ), |
| 64 | + "facebook/sam2.1-hiera-large": ( |
| 65 | + "configs/sam2.1/sam2.1_hiera_l.yaml", |
| 66 | + "sam2.1_hiera_large.pt", |
| 67 | + ), |
| 68 | +} |
| 69 | + |
| 70 | + |
| 71 | +def build_sam2( |
| 72 | + config_file, |
| 73 | + ckpt_path=None, |
| 74 | + device="cuda", |
| 75 | + mode="eval", |
| 76 | + hydra_overrides_extra=[], |
| 77 | + apply_postprocessing=True, |
| 78 | + **kwargs, |
| 79 | +): |
| 80 | + |
| 81 | + if apply_postprocessing: |
| 82 | + hydra_overrides_extra = hydra_overrides_extra.copy() |
| 83 | + hydra_overrides_extra += [ |
| 84 | + # dynamically fall back to multi-mask if the single mask is not stable |
| 85 | + "++model.sam_mask_decoder_extra_args.dynamic_multimask_via_stability=true", |
| 86 | + "++model.sam_mask_decoder_extra_args.dynamic_multimask_stability_delta=0.05", |
| 87 | + "++model.sam_mask_decoder_extra_args.dynamic_multimask_stability_thresh=0.98", |
| 88 | + ] |
| 89 | + # Read config and init model |
| 90 | + cfg = compose(config_name=config_file, overrides=hydra_overrides_extra) |
| 91 | + OmegaConf.resolve(cfg) |
| 92 | + model = instantiate(cfg.model, _recursive_=True) |
| 93 | + _load_checkpoint(model, ckpt_path) |
| 94 | + model = model.to(device) |
| 95 | + if mode == "eval": |
| 96 | + model.eval() |
| 97 | + return model |
| 98 | + |
| 99 | + |
| 100 | +def build_sam2_video_predictor( |
| 101 | + config_file, |
| 102 | + ckpt_path=None, |
| 103 | + device="cuda", |
| 104 | + mode="eval", |
| 105 | + hydra_overrides_extra=[], |
| 106 | + apply_postprocessing=True, |
| 107 | + vos_optimized=False, |
| 108 | + **kwargs, |
| 109 | +): |
| 110 | + hydra_overrides = [ |
| 111 | + "++model._target_=sam2.sam2_video_predictor.SAM2VideoPredictor", |
| 112 | + ] |
| 113 | + if vos_optimized: |
| 114 | + hydra_overrides = [ |
| 115 | + "++model._target_=sam2.sam2_video_predictor.SAM2VideoPredictorVOS", |
| 116 | + "++model.compile_image_encoder=True", # Let sam2_base handle this |
| 117 | + ] |
| 118 | + |
| 119 | + if apply_postprocessing: |
| 120 | + hydra_overrides_extra = hydra_overrides_extra.copy() |
| 121 | + hydra_overrides_extra += [ |
| 122 | + # dynamically fall back to multi-mask if the single mask is not stable |
| 123 | + "++model.sam_mask_decoder_extra_args.dynamic_multimask_via_stability=true", |
| 124 | + "++model.sam_mask_decoder_extra_args.dynamic_multimask_stability_delta=0.05", |
| 125 | + "++model.sam_mask_decoder_extra_args.dynamic_multimask_stability_thresh=0.98", |
| 126 | + # the sigmoid mask logits on interacted frames with clicks in the memory encoder so that the encoded masks are exactly as what users see from clicking |
| 127 | + "++model.binarize_mask_from_pts_for_mem_enc=true", |
| 128 | + # fill small holes in the low-res masks up to `fill_hole_area` (before resizing them to the original video resolution) |
| 129 | + "++model.fill_hole_area=8", |
| 130 | + ] |
| 131 | + hydra_overrides.extend(hydra_overrides_extra) |
| 132 | + |
| 133 | + # Read config and init model |
| 134 | + cfg = compose(config_name=config_file, overrides=hydra_overrides) |
| 135 | + OmegaConf.resolve(cfg) |
| 136 | + model = instantiate(cfg.model, _recursive_=True) |
| 137 | + _load_checkpoint(model, ckpt_path) |
| 138 | + model = model.to(device) |
| 139 | + if mode == "eval": |
| 140 | + model.eval() |
| 141 | + return model |
| 142 | + |
| 143 | + |
| 144 | +def _hf_download(model_id): |
| 145 | + from huggingface_hub import hf_hub_download |
| 146 | + |
| 147 | + config_name, checkpoint_name = HF_MODEL_ID_TO_FILENAMES[model_id] |
| 148 | + ckpt_path = hf_hub_download(repo_id=model_id, filename=checkpoint_name) |
| 149 | + return config_name, ckpt_path |
| 150 | + |
| 151 | + |
| 152 | +def build_sam2_hf(model_id, **kwargs): |
| 153 | + config_name, ckpt_path = _hf_download(model_id) |
| 154 | + return build_sam2(config_file=config_name, ckpt_path=ckpt_path, **kwargs) |
| 155 | + |
| 156 | + |
| 157 | +def build_sam2_video_predictor_hf(model_id, **kwargs): |
| 158 | + config_name, ckpt_path = _hf_download(model_id) |
| 159 | + return build_sam2_video_predictor( |
| 160 | + config_file=config_name, ckpt_path=ckpt_path, **kwargs |
| 161 | + ) |
| 162 | + |
| 163 | + |
| 164 | +def _load_checkpoint(model, ckpt_path): |
| 165 | + if ckpt_path is not None: |
| 166 | + sd = torch.load(ckpt_path, map_location="cpu", weights_only=True)["model"] |
| 167 | + missing_keys, unexpected_keys = model.load_state_dict(sd) |
| 168 | + if missing_keys: |
| 169 | + logging.error(missing_keys) |
| 170 | + raise RuntimeError() |
| 171 | + if unexpected_keys: |
| 172 | + logging.error(unexpected_keys) |
| 173 | + raise RuntimeError() |
| 174 | + logging.info("Loaded checkpoint sucessfully") |
0 commit comments