Skip to content

Commit 090fcbb

Browse files
authored
feat(transformers): add SmolVLM (#1093)
* add SmolVLM * resolve comments * resolve comments * resolve comments * add copyright
1 parent 17f9d2e commit 090fcbb

File tree

10 files changed

+1380
-3
lines changed

10 files changed

+1380
-3
lines changed

mindone/transformers/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
AutoImageProcessor,
5555
AutoModel,
5656
AutoModelForCausalLM,
57+
AutoModelForImageTextToText,
5758
AutoModelForMaskedLM,
5859
AutoProcessor,
5960
)
@@ -436,6 +437,12 @@
436437
SiglipTextModel,
437438
SiglipVisionModel,
438439
)
440+
from .models.smolvlm import (
441+
SmolVLMForConditionalGeneration,
442+
SmolVLMModel,
443+
SmolVLMPreTrainedModel,
444+
SmolVLMVisionTransformer,
445+
)
439446
from .models.speecht5 import (
440447
SpeechT5ForSpeechToSpeech,
441448
SpeechT5ForSpeechToText,

mindone/transformers/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
qwen2_vl,
7575
roberta,
7676
siglip,
77+
smolvlm,
7778
speecht5,
7879
starcoder2,
7980
switch_transformers,

mindone/transformers/models/auto/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@
1717
from .configuration_auto import AutoConfig
1818
from .feature_extraction_auto import AutoFeatureExtractor
1919
from .image_processing_auto import AutoImageProcessor
20-
from .modeling_auto import AutoModel, AutoModelForCausalLM, AutoModelForMaskedLM
20+
from .modeling_auto import AutoModel, AutoModelForCausalLM, AutoModelForImageTextToText, AutoModelForMaskedLM
2121
from .processing_auto import AutoProcessor

mindone/transformers/models/auto/configuration_auto.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@
100100
("rembert", "RemBertConfig"),
101101
("siglip", "SiglipConfig"),
102102
("siglip_vision_model", "SiglipVisionConfig"),
103+
("smolvlm", "SmolVLMConfig"),
104+
("smolvlm_vision", "SmolVLMVisionConfig"),
103105
("speecht5", "SpeechT5Config"),
104106
("t5", "T5Config"),
105107
("umt5", "UMT5Config"),
@@ -189,6 +191,8 @@
189191
("rembert", "RemBERT"),
190192
("siglip", "SigLIP"),
191193
("siglip_vision_model", "SiglipVisionModel"),
194+
("smolvlm", "SmolVLM"),
195+
("smolvlm_vision", "SmolVLMVisionTransformer"),
192196
("speecht5", "SpeechT5"),
193197
("t5", "T5"),
194198
("t5v1.1", "T5v1.1"),
@@ -249,6 +253,7 @@
249253
("clip_text_model", "clip"),
250254
("aria_text", "aria"),
251255
("siglip_vision_model", "siglip"),
256+
("smolvlm_vision", "smolvlm"),
252257
("chinese_clip_vision_model", "chinese_clip"),
253258
("rt_detr_resnet", "rt_detr"),
254259
]

mindone/transformers/models/auto/modeling_auto.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@
9797
("rembert", "RemBertModel"),
9898
("siglip", "SiglipModel"),
9999
("siglip_vision_model", "SiglipVisionModel"),
100+
("smolvlm", "SmolVLMModel"),
101+
("smolvlm_vision", "SmolVLMVisionTransformer"),
100102
("speecht5", "SpeechT5Model"),
101103
("t5", "T5Model"),
102104
("umt5", "UMT5Model"),
@@ -290,6 +292,7 @@
290292
("paligemma", "PaliGemmaForConditionalGeneration"),
291293
("qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration"),
292294
("qwen2_vl", "Qwen2VLForConditionalGeneration"),
295+
("smolvlm", "SmolVLMForConditionalGeneration"),
293296
]
294297
)
295298

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# coding=utf-8
2+
# Copyright 2025 the HuggingFace Inc. team. All rights reserved.
3+
# Written by Orr Zohar
4+
#
5+
# This code is adapted from https://github.com/huggingface/transformers
6+
# with modifications to run transformers on mindspore.
7+
#
8+
# Licensed under the Apache License, Version 2.0 (the "License");
9+
# you may not use this file except in compliance with the License.
10+
# You may obtain a copy of the License at
11+
#
12+
# http://www.apache.org/licenses/LICENSE-2.0
13+
#
14+
# Unless required by applicable law or agreed to in writing, software
15+
# distributed under the License is distributed on an "AS IS" BASIS,
16+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
# See the License for the specific language governing permissions and
18+
# limitations under the License.
19+
20+
from .modeling_smolvlm import *

0 commit comments

Comments
 (0)