mindspore-lab
diff --git a/‎mindone/transformers/__init__.py‎
Lines changed: 10 additions & 0 deletions b/‎mindone/transformers/__init__.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎mindone/transformers/models/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎mindone/transformers/models/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎mindone/transformers/models/auto/configuration_auto.py‎
Lines changed: 4 additions & 0 deletions b/‎mindone/transformers/models/auto/configuration_auto.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎mindone/transformers/models/auto/image_processing_auto.py‎
Lines changed: 2 additions & 0 deletions b/‎mindone/transformers/models/auto/image_processing_auto.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎mindone/transformers/models/auto/modeling_auto.py‎
Lines changed: 3 additions & 0 deletions b/‎mindone/transformers/models/auto/modeling_auto.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎mindone/transformers/models/auto/processing_auto.py‎
Lines changed: 1 addition & 0 deletions b/‎mindone/transformers/models/auto/processing_auto.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mindone/transformers/models/owlvit/__init__.py‎
Lines changed: 17 additions & 0 deletions b/‎mindone/transformers/models/owlvit/__init__.py‎
Lines changed: 17 additions & 0 deletions
@@ -476,6 +476,15 @@
     OPTModel,
     OPTPreTrainedModel,
 )
+from .models.owlvit import (
+    OwlViTForObjectDetection,
+    OwlViTImageProcessor,
+    OwlViTModel,
+    OwlViTPreTrainedModel,
+    OwlViTProcessor,
+    OwlViTTextModel,
+    OwlViTVisionModel,
+)
 from .models.paligemma import PaliGemmaForConditionalGeneration, PaliGemmaPreTrainedModel
 from .models.persimmon import (
     PersimmonForCausalLM,
@@ -638,6 +647,7 @@
     XLMRobertaXLModel,
     XLMRobertaXLPreTrainedModel,
 )
+from .models.yolos import YolosForObjectDetection, YolosImageProcessor, YolosModel, YolosPreTrainedModel
 from .pipelines import TextGenerationPipeline, pipeline
 from .processing_utils import ProcessorMixin
 
 
@@ -71,6 +71,7 @@
     mpt,
     mvp,
     opt,
+    owlvit,
     paligemma,
     persimmon,
     phi,
@@ -93,6 +94,7 @@
     vits,
     wav2vec2,
     xlm_roberta,
+    yolos,
 )
 
 if version.parse(transformers.__version__) >= version.parse("4.51.0"):
 
@@ -93,6 +93,7 @@
         ("mt5", "MT5Config"),
         ("megatron-bert", "MegatronBertConfig"),
         ("mixtral", "MixtralConfig"),
+        ("owlvit", "OwlViTConfig"),
         ("paligemma", "PaliGemmaConfig"),
         ("phi", "PhiConfig"),
         ("phi3", "Phi3Config"),
@@ -120,6 +121,7 @@
         ("whisper", "WhisperConfig"),
         ("xlm-roberta", "XLMRobertaConfig"),
         ("xlm-roberta-xl", "XLMRobertaXLConfig"),
+        ("yolos", "YolosConfig"),
         ("cohere2", "Cohere2Config"),
     ]
 )
@@ -195,6 +197,7 @@
         ("megatron-bert", "Megatron-BERT"),
         ("mistral", "Mistral"),
         ("mixtral", "Mixtral"),
+        ("owlvit", "OWL-ViT"),
         ("paligemma", "PaliGemma"),
         ("phi", "Phi"),
         ("phi3", "Phi3"),
@@ -222,6 +225,7 @@
         ("opt", "OPT"),
         ("xlm-roberta", "XLM-RoBERTa"),
         ("xlm-roberta-xl", "XLM-RoBERTa-XL"),
+        ("yolos", "YOLOS"),
         ("cohere2", "Cohere2"),
     ]
 )
 
@@ -60,8 +60,10 @@
             ("llava_next", ("LlavaNextImageProcessor",)),
             ("llava_next_video", ("LlavaNextVideoImageProcessor",)),
             ("llava_onevision", ("LlavaOnevisionImageProcessor",)),
+            ("owlvit", ("OwlViTImageProcessor",)),
             ("segformer", ("SegformerImageProcessor",)),
             ("siglip", ("SiglipImageProcessor", "SiglipImageProcessorFast")),
+            ("yolos", ("YolosImageProcessor",)),
         ]
     )
 
 
@@ -92,6 +92,7 @@
         ("mt5", "MT5Model"),
         ("megatron-bert", "MegatronBertModel"),
         ("mixtral", "MixtralModel"),
+        ("owlvit", "OwlViTModel"),
         ("phi", "PhiModel"),
         ("phi3", "Phi3Model"),
         ("qwen2", "Qwen2Model"),
@@ -114,6 +115,7 @@
         ("whisper", "WhisperModel"),
         ("xlm-roberta", "XLMRobertaModel"),
         ("xlm-roberta-xl", "XLMRobertaXLModel"),
+        ("yolos", "YolosModel"),
         ("cohere2", "Cohere2Model"),
     ]
 )
@@ -235,6 +237,7 @@
         ("segformer", "SegformerModel"),
         ("siglip_vision_model", "SiglipVisionModel"),
         ("vit", "ViTModel"),
+        ("yolos", "YolosModel"),
     ]
 )
 
 
@@ -55,6 +55,7 @@
         ("llava_next", "LlavaNextProcessor"),
         ("llava_next_video", "LlavaNextVideoProcessor"),
         ("llava_onevision", "LlavaOnevisionProcessor"),
+        ("owlvit", "OwlViTProcessor"),
         ("siglip", "SiglipProcessor"),
     ]
 )
 
@@ -0,0 +1,17 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .image_processing_owlvit import *
+from .modeling_owlvit import *
+from .processing_owlvit import *
Original file line number	Diff line number	Diff line change
`@@ -60,8 +60,10 @@`
`60`	`60`	`("llava_next", ("LlavaNextImageProcessor",)),`
`61`	`61`	`("llava_next_video", ("LlavaNextVideoImageProcessor",)),`
`62`	`62`	`("llava_onevision", ("LlavaOnevisionImageProcessor",)),`
	`63`	`+ ("owlvit", ("OwlViTImageProcessor",)),`
`63`	`64`	`("segformer", ("SegformerImageProcessor",)),`
`64`	`65`	`("siglip", ("SiglipImageProcessor", "SiglipImageProcessorFast")),`
	`66`	`+ ("yolos", ("YolosImageProcessor",)),`
`65`	`67`	`]`
`66`	`68`	`)`
`67`	`69`
Original file line number	Diff line number	Diff line change
`@@ -92,6 +92,7 @@`
`92`	`92`	`("mt5", "MT5Model"),`
`93`	`93`	`("megatron-bert", "MegatronBertModel"),`
`94`	`94`	`("mixtral", "MixtralModel"),`
	`95`	`+ ("owlvit", "OwlViTModel"),`
`95`	`96`	`("phi", "PhiModel"),`
`96`	`97`	`("phi3", "Phi3Model"),`
`97`	`98`	`("qwen2", "Qwen2Model"),`
`@@ -114,6 +115,7 @@`
`114`	`115`	`("whisper", "WhisperModel"),`
`115`	`116`	`("xlm-roberta", "XLMRobertaModel"),`
`116`	`117`	`("xlm-roberta-xl", "XLMRobertaXLModel"),`
	`118`	`+ ("yolos", "YolosModel"),`
`117`	`119`	`("cohere2", "Cohere2Model"),`
`118`	`120`	`]`
`119`	`121`	`)`
`@@ -235,6 +237,7 @@`
`235`	`237`	`("segformer", "SegformerModel"),`
`236`	`238`	`("siglip_vision_model", "SiglipVisionModel"),`
`237`	`239`	`("vit", "ViTModel"),`
	`240`	`+ ("yolos", "YolosModel"),`
`238`	`241`	`]`
`239`	`242`	`)`
`240`	`243`
Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,7 @@`
`55`	`55`	`("llava_next", "LlavaNextProcessor"),`
`56`	`56`	`("llava_next_video", "LlavaNextVideoProcessor"),`
`57`	`57`	`("llava_onevision", "LlavaOnevisionProcessor"),`
	`58`	`+ ("owlvit", "OwlViTProcessor"),`
`58`	`59`	`("siglip", "SiglipProcessor"),`
`59`	`60`	`]`
`60`	`61`	`)`