mindspore-lab
diff --git a/‎mindone/transformers/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎mindone/transformers/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mindone/transformers/models/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎mindone/transformers/models/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mindone/transformers/models/auto/configuration_auto.py‎
Lines changed: 2 additions & 0 deletions b/‎mindone/transformers/models/auto/configuration_auto.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎mindone/transformers/models/auto/modeling_auto.py‎
Lines changed: 8 additions & 1 deletion b/‎mindone/transformers/models/auto/modeling_auto.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎mindone/transformers/models/vit/__init__.py‎
Lines changed: 17 additions & 0 deletions b/‎mindone/transformers/models/vit/__init__.py‎
Lines changed: 17 additions & 0 deletions
@@ -493,6 +493,7 @@
     UMT5Model,
     UMT5PreTrainedModel,
 )
+from .models.vit import ViTForImageClassification, ViTForMaskedImageModeling, ViTModel, ViTPreTrainedModel
 from .models.vits import VitsModel, VitsPreTrainedModel
 from .models.wav2vec2 import (
     Wav2Vec2FeatureExtractor,
 
@@ -84,6 +84,7 @@
     switch_transformers,
     t5,
     umt5,
+    vit,
     vits,
     wav2vec2,
     xlm_roberta,
 
@@ -107,6 +107,7 @@
         ("speecht5", "SpeechT5Config"),
         ("t5", "T5Config"),
         ("umt5", "UMT5Config"),
+        ("vit", "ViTConfig"),
         ("wav2vec2", "Wav2Vec2Config"),
         ("mvp", "MvpConfig"),
         ("whisper", "WhisperConfig"),
@@ -200,6 +201,7 @@
         ("t5", "T5"),
         ("t5v1.1", "T5v1.1"),
         ("umt5", "UMT5"),
+        ("vit", "ViT"),
         ("wav2vec2", "Wav2Vec2"),
         ("whisper", "Whisper"),
         ("convbert", "ConvBERT"),
 
@@ -103,6 +103,7 @@
         ("speecht5", "SpeechT5Model"),
         ("t5", "T5Model"),
         ("umt5", "UMT5Model"),
+        ("vit", "ViTModel"),
         ("wav2vec2", "Wav2Vec2Model"),
         ("whisper", "WhisperModel"),
         ("xlm-roberta", "XLMRobertaModel"),
@@ -223,10 +224,15 @@
         ("imagegpt", "ImageGPTModel"),
         ("levit", "LevitModel"),
         ("siglip_vision_model", "SiglipVisionModel"),
+        ("vit", "ViTModel"),
     ]
 )
 
-MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES = OrderedDict()
+MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING_NAMES = OrderedDict(
+    [
+        ("vit", "ViTForMaskedImageModeling"),
+    ]
+)
 
 
 MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING_NAMES = OrderedDict(
@@ -248,6 +254,7 @@
             ("LevitForImageClassification", "LevitForImageClassificationWithTeacher"),
         ),
         ("siglip", "SiglipForImageClassification"),
+        ("vit", "ViTForImageClassification"),
     ]
 )
 
 
@@ -0,0 +1,17 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# This code is adapted from https://github.com/huggingface/transformers
+# with modifications to run transformers on mindspore.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .modeling_vit import *
Original file line number	Diff line number	Diff line change
`@@ -493,6 +493,7 @@`
`493`	`493`	`UMT5Model,`
`494`	`494`	`UMT5PreTrainedModel,`
`495`	`495`	`)`
	`496`	`+from .models.vit import ViTForImageClassification, ViTForMaskedImageModeling, ViTModel, ViTPreTrainedModel`
`496`	`497`	`from .models.vits import VitsModel, VitsPreTrainedModel`
`497`	`498`	`from .models.wav2vec2 import (`
`498`	`499`	`Wav2Vec2FeatureExtractor,`