Skip to content

Commit c212a03

Browse files
authored
Add ViT-bigG model implementation (#93)
- Initialized mlcd_rope2d module to support ViT-bigG architecture.
1 parent dad7666 commit c212a03

File tree

7 files changed

+1647
-1
lines changed

7 files changed

+1647
-1
lines changed

llava/model/multimodal_encoder/builder.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .hf_vision import HFVisionTower
66
from .siglip_encoder import SigLipVisionTower
77
from .clip_encoder import CLIPVisionTower, CLIPVisionTowerS2
8+
from .mlcd_encoder import MLCDVisionTower, MLCDVisionTowerS2
89

910
# from .eva_clip.eva_clip_encoder import EvaClipVisionTower
1011
# from .dev_eva_clip.eva_vit import EvaViTWrapper
@@ -14,12 +15,20 @@ def build_vision_tower(vision_tower_cfg, **kwargs):
1415
vision_tower = getattr(vision_tower_cfg, "mm_vision_tower", getattr(vision_tower_cfg, "vision_tower", None))
1516
is_absolute_path_exists = os.path.exists(vision_tower)
1617
use_s2 = getattr(vision_tower_cfg, "s2", False)
17-
if "clip" in vision_tower or "mlcd" in vision_tower or "unicom" in vision_tower or vision_tower.startswith("openai") \
18+
19+
if "mlcd-vit-bigG-patch14-336" in vision_tower:
20+
if use_s2:
21+
return MLCDVisionTowerS2(vision_tower, args=vision_tower_cfg, **kwargs)
22+
else:
23+
return MLCDVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)
24+
25+
elif "clip" in vision_tower or "mlcd" in vision_tower or "unicom" in vision_tower or vision_tower.startswith("openai") \
1826
or vision_tower.startswith("laion") or "ShareGPT4V" in vision_tower or vision_tower.startswith("DeepGlint"):
1927
if use_s2:
2028
return CLIPVisionTowerS2(vision_tower, args=vision_tower_cfg, **kwargs)
2129
else:
2230
return CLIPVisionTower(vision_tower, args=vision_tower_cfg, **kwargs)
31+
2332
elif "siglip" in vision_tower:
2433
return SigLipVisionTower(vision_tower, vision_tower_cfg=vision_tower_cfg, **kwargs)
2534
elif vision_tower.startswith("hf:"):

0 commit comments

Comments
 (0)