Skip to content

Commit 60ebf55

Browse files
committed
- 添加了对sam2.1的支持
1 parent 5233ff4 commit 60ebf55

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1449
-7796
lines changed

ISAT/icons.qrc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
<RCC>
22
<qresource prefix="icon">
3+
<file>../icons/关闭-小_close-small.svg</file>
4+
<file>../icons/校验-小_check-small.svg</file>
35
<file>../icons/play-1.svg</file>
46
<file>../icons/play-5.svg</file>
57
<file>../icons/play-all.svg</file>

ISAT/icons_rc.py

Lines changed: 148 additions & 90 deletions
Large diffs are not rendered by default.

ISAT/segment_any/model_zoo.py

Lines changed: 93 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
],
1111
'memory': '534M',
1212
'bf16_memory': '390M',
13-
'params': '40M'
13+
'params': '40M',
14+
'image_segment': True,
15+
'video_segment': False,
1416
},
1517
'sam_hq_vit_h.pth':
1618
{
@@ -20,7 +22,9 @@
2022
],
2123
'memory': '6464M',
2224
'bf16_memory': '3378M',
23-
'params': '2.6G'
25+
'params': '2.6G',
26+
'image_segment': True,
27+
'video_segment': False,
2428
},
2529
'sam_hq_vit_l.pth':
2630
{
@@ -30,7 +34,9 @@
3034
],
3135
'memory': '5016M',
3236
'bf16_memory': '2634M',
33-
'params': '1.3G'
37+
'params': '1.3G',
38+
'image_segment': True,
39+
'video_segment': False,
3440
},
3541
'sam_hq_vit_b.pth':
3642
{
@@ -40,7 +46,9 @@
4046
],
4147
'memory': '3304M',
4248
'bf16_memory': '1762M',
43-
'params': '379M'
49+
'params': '379M',
50+
'image_segment': True,
51+
'video_segment': False,
4452
},
4553
'sam_hq_vit_tiny.pth':
4654
{
@@ -50,7 +58,9 @@
5058
],
5159
'memory': '598M',
5260
'bf16_memory': '392M',
53-
'params': '43M'
61+
'params': '43M',
62+
'image_segment': True,
63+
'video_segment': False,
5464
},
5565
'sam_vit_h_4b8939.pth':
5666
{
@@ -60,7 +70,9 @@
6070
],
6171
'memory': '6462M',
6272
'bf16_memory': '3378M',
63-
'params': '2.6G'
73+
'params': '2.6G',
74+
'image_segment': True,
75+
'video_segment': False,
6476
},
6577
'sam_vit_l_0b3195.pth':
6678
{
@@ -70,7 +82,9 @@
7082
],
7183
'memory': '5016M',
7284
'bf16_memory': '2634M',
73-
'params': '1.3G'
85+
'params': '1.3G',
86+
'image_segment': True,
87+
'video_segment': False,
7488
},
7589
'sam_vit_b_01ec64.pth':
7690
{
@@ -80,7 +94,9 @@
8094
],
8195
'memory': '3302M',
8296
'bf16_memory': '1760M',
83-
'params': '375M'
97+
'params': '375M',
98+
'image_segment': True,
99+
'video_segment': False,
84100
},
85101
'edge_sam.pth':
86102
{
@@ -90,7 +106,9 @@
90106
],
91107
'memory': '360M',
92108
'bf16_memory': '304M',
93-
'params': '38.8M'
109+
'params': '38.8M',
110+
'image_segment': True,
111+
'video_segment': False,
94112
},
95113
'edge_sam_3x.pth':
96114
{
@@ -100,7 +118,9 @@
100118
],
101119
'memory': '360M',
102120
'bf16_memory': '304M',
103-
'params': '38.8M'
121+
'params': '38.8M',
122+
'image_segment': True,
123+
'video_segment': False,
104124
},
105125
'sam-med2d_b.pth':
106126
{
@@ -110,7 +130,9 @@
110130
],
111131
'memory': '1500M',
112132
'bf16_memory': '1050M',
113-
'params': '2.4G'
133+
'params': '2.4G',
134+
'image_segment': True,
135+
'video_segment': False,
114136
},
115137
'sam2_hiera_large.pt':
116138
{
@@ -120,7 +142,9 @@
120142
],
121143
'memory': '4000M',
122144
'bf16_memory': '2800M',
123-
'params': '900M'
145+
'params': '900M',
146+
'image_segment': True,
147+
'video_segment': True,
124148
},
125149
'sam2_hiera_base_plus.pt':
126150
{
@@ -130,7 +154,9 @@
130154
],
131155
'memory': '2800M',
132156
'bf16_memory': '2200M',
133-
'params': '324M'
157+
'params': '324M',
158+
'image_segment': True,
159+
'video_segment': True,
134160
},
135161
'sam2_hiera_small.pt':
136162
{
@@ -140,7 +166,9 @@
140166
],
141167
'memory': '2500M',
142168
'bf16_memory': '1800M',
143-
'params': '185M'
169+
'params': '185M',
170+
'image_segment': True,
171+
'video_segment': True,
144172
},
145173
'sam2_hiera_tiny.pt':
146174
{
@@ -150,6 +178,56 @@
150178
],
151179
'memory': '2200M',
152180
'bf16_memory': '1500M',
153-
'params': '156M'
181+
'params': '156M',
182+
'image_segment': True,
183+
'video_segment': True,
184+
},
185+
'sam2.1_hiera_large.pt':
186+
{
187+
'urls': [
188+
'https://huggingface.co/yatengLG/ISAT_with_segment_anything_checkpoints/resolve/main/sam2.1_hiera_large.pt',
189+
'https://www.modelscope.cn/api/v1/models/yatengLG/ISAT_with_segment_anything_checkpoints/repo?Revision=master&FilePath=checkpoints/sam2.1_hiera_large.pt'
190+
],
191+
'memory': '4000M',
192+
'bf16_memory': '2800M',
193+
'params': '900M',
194+
'image_segment': True,
195+
'video_segment': True,
196+
},
197+
'sam2.1_hiera_base_plus.pt':
198+
{
199+
'urls': [
200+
'https://huggingface.co/yatengLG/ISAT_with_segment_anything_checkpoints/resolve/main/sam2.1_hiera_base_plus.pt',
201+
'https://www.modelscope.cn/api/v1/models/yatengLG/ISAT_with_segment_anything_checkpoints/repo?Revision=master&FilePath=checkpoints/sam2.1_hiera_base_plus.pt'
202+
],
203+
'memory': '2800M',
204+
'bf16_memory': '2200M',
205+
'params': '324M',
206+
'image_segment': True,
207+
'video_segment': True,
208+
},
209+
'sam2.1_hiera_small.pt':
210+
{
211+
'urls': [
212+
'https://huggingface.co/yatengLG/ISAT_with_segment_anything_checkpoints/resolve/main/sam2.1_hiera_small.pt',
213+
'https://www.modelscope.cn/api/v1/models/yatengLG/ISAT_with_segment_anything_checkpoints/repo?Revision=master&FilePath=checkpoints/sam2.1_hiera_small.pt'
214+
],
215+
'memory': '2500M',
216+
'bf16_memory': '1800M',
217+
'params': '185M',
218+
'image_segment': True,
219+
'video_segment': True,
220+
},
221+
'sam2.1_hiera_tiny.pt':
222+
{
223+
'urls': [
224+
'https://huggingface.co/yatengLG/ISAT_with_segment_anything_checkpoints/resolve/main/sam2.1_hiera_tiny.pt',
225+
'https://www.modelscope.cn/api/v1/models/yatengLG/ISAT_with_segment_anything_checkpoints/repo?Revision=master&FilePath=checkpoints/sam2.1_hiera_tiny.pt'
226+
],
227+
'memory': '2200M',
228+
'bf16_memory': '1500M',
229+
'params': '156M',
230+
'image_segment': True,
231+
'video_segment': True,
154232
},
155233
}

ISAT/segment_any/sam2/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,7 @@
55
# LICENSE file in the root directory of this source tree.
66

77
from hydra import initialize_config_module
8+
from hydra.core.global_hydra import GlobalHydra
89

9-
initialize_config_module("ISAT/segment_any/sam2/configs", version_base="1.2")
10+
if not GlobalHydra.instance().is_initialized():
11+
initialize_config_module("ISAT/segment_any/sam2/configs", version_base="1.2")

ISAT/segment_any/sam2/automatic_mask_generator.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def __init__(
5353
output_mode: str = "binary_mask",
5454
use_m2m: bool = False,
5555
multimask_output: bool = True,
56+
**kwargs,
5657
) -> None:
5758
"""
5859
Using a SAM 2 model, generates masks for the entire image.
@@ -148,6 +149,23 @@ def __init__(
148149
self.use_m2m = use_m2m
149150
self.multimask_output = multimask_output
150151

152+
@classmethod
153+
def from_pretrained(cls, model_id: str, **kwargs) -> "SAM2AutomaticMaskGenerator":
154+
"""
155+
Load a pretrained model from the Hugging Face hub.
156+
157+
Arguments:
158+
model_id (str): The Hugging Face repository ID.
159+
**kwargs: Additional arguments to pass to the model constructor.
160+
161+
Returns:
162+
(SAM2AutomaticMaskGenerator): The loaded model.
163+
"""
164+
from ISAT.segment_any.sam2.build_sam import build_sam2_hf
165+
166+
sam_model = build_sam2_hf(model_id, **kwargs)
167+
return cls(sam_model, **kwargs)
168+
151169
@torch.no_grad()
152170
def generate(self, image: np.ndarray) -> List[Dict[str, Any]]:
153171
"""
@@ -284,7 +302,9 @@ def _process_batch(
284302
orig_h, orig_w = orig_size
285303

286304
# Run model on this batch
287-
points = torch.as_tensor(points, device=self.predictor.device)
305+
points = torch.as_tensor(
306+
points, dtype=torch.float32, device=self.predictor.device
307+
)
288308
in_points = self.predictor._transforms.transform_coords(
289309
points, normalize=normalize, orig_hw=im_size
290310
)

0 commit comments

Comments
 (0)