Skip to content

Commit b5436dd

Browse files
authored
feat(transformers) Pipeline class-classification; Model ViT (#1178)
* init pipelines `image-classification`, `image-segmentation` * add vit for img_cls pipeline testing * debug, passed vit generate * passed pipeline; add ut * debug; vit passed ut * fix bug for img classification * add copyright * update after merging master * delete jit decorator
1 parent 1450bf2 commit b5436dd

File tree

10 files changed

+1385
-1
lines changed

10 files changed

+1385
-1
lines changed

mindone/transformers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,7 @@
493493
UMT5Model,
494494
UMT5PreTrainedModel,
495495
)
496+
from .models.vit import ViTForImageClassification, ViTForMaskedImageModeling, ViTModel, ViTPreTrainedModel
496497
from .models.vits import VitsModel, VitsPreTrainedModel
497498
from .models.wav2vec2 import (
498499
Wav2Vec2FeatureExtractor,

mindone/transformers/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
switch_transformers,
8585
t5,
8686
umt5,
87+
vit,
8788
vits,
8889
wav2vec2,
8990
xlm_roberta,

mindone/transformers/models/auto/configuration_auto.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@
107107
("speecht5", "SpeechT5Config"),
108108
("t5", "T5Config"),
109109
("umt5", "UMT5Config"),
110+
("vit", "ViTConfig"),
110111
("wav2vec2", "Wav2Vec2Config"),
111112
("mvp", "MvpConfig"),
112113
("whisper", "WhisperConfig"),
@@ -200,6 +201,7 @@
200201
("t5", "T5"),
201202
("t5v1.1", "T5v1.1"),
202203
("umt5", "UMT5"),
204+
("vit", "ViT"),
203205
("wav2vec2", "Wav2Vec2"),
204206
("whisper", "Whisper"),
205207
("convbert", "ConvBERT"),

mindone/transformers/models/auto/modeling_auto.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
("speecht5", "SpeechT5Model"),
104104
("t5", "T5Model"),
105105
("umt5", "UMT5Model"),
106+
("vit", "ViTModel"),
106107
("wav2vec2", "Wav2Vec2Model"),
107108
("whisper", "WhisperModel"),
108109
("xlm-roberta", "XLMRobertaModel"),
@@ -223,10 +224,15 @@
223224
("imagegpt", "ImageGPTModel"),
224225
("levit", "LevitModel"),
225226
("siglip_vision_model", "SiglipVisionModel"),
227+
("vit", "ViTModel"),
226228
]
227229
)
228230

229-
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES = OrderedDict()
231+
MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES = OrderedDict(
232+
[
233+
("vit", "ViTForMaskedImageModeling"),
234+
]
235+
)
230236

231237

232238
MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES = OrderedDict(
@@ -248,6 +254,7 @@
248254
("LevitForImageClassification", "LevitForImageClassificationWithTeacher"),
249255
),
250256
("siglip", "SiglipForImageClassification"),
257+
("vit", "ViTForImageClassification"),
251258
]
252259
)
253260

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright 2024 The HuggingFace Team. All rights reserved.
2+
#
3+
# This code is adapted from https://github.com/huggingface/transformers
4+
# with modifications to run transformers on mindspore.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
from .modeling_vit import *

0 commit comments

Comments
 (0)