32
32
33
33
from huggingface_hub import model_info
34
34
from huggingface_hub .constants import HF_HUB_OFFLINE
35
- from PIL import Image
36
35
37
36
import transformers
38
37
from transformers .models .auto .modeling_auto import (
45
44
is_openai_available ,
46
45
is_pydantic_available ,
47
46
is_uvicorn_available ,
47
+ is_vision_available ,
48
48
)
49
49
50
50
from .. import (
54
54
ProcessorMixin ,
55
55
TextIteratorStreamer ,
56
56
)
57
- from ..generation .continuous_batching import ContinuousBatchingManager , RequestStatus
58
57
from ..utils import is_torch_available , logging
59
58
from . import BaseTransformersCLICommand
60
59
69
68
PreTrainedModel ,
70
69
)
71
70
71
+ from ..generation .continuous_batching import ContinuousBatchingManager , RequestStatus
72
+
72
73
if is_librosa_available ():
73
74
import librosa
74
75
76
+ if is_vision_available ():
77
+ from PIL import Image
78
+
75
79
serve_dependencies_available = (
76
80
is_pydantic_available () and is_fastapi_available () and is_uvicorn_available () and is_openai_available ()
77
81
)
@@ -811,7 +815,7 @@ def stream_chat_completion(_inputs):
811
815
return stream_chat_completion (inputs [0 ])
812
816
813
817
@staticmethod
814
- def get_model_modality (model : PreTrainedModel ) -> Modality :
818
+ def get_model_modality (model : " PreTrainedModel" ) -> Modality :
815
819
model_classname = model .__class__ .__name__
816
820
if model_classname in MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES .values ():
817
821
modality = Modality .VLM
@@ -1545,7 +1549,9 @@ def _load_model_and_data_processor(self, model_id_and_revision: str):
1545
1549
logger .info (f"Loaded model { model_id_and_revision } " )
1546
1550
return model , data_processor
1547
1551
1548
- def load_model_and_processor (self , model_id_and_revision : str ) -> tuple [PreTrainedModel , PreTrainedTokenizerFast ]:
1552
+ def load_model_and_processor (
1553
+ self , model_id_and_revision : str
1554
+ ) -> tuple ["PreTrainedModel" , PreTrainedTokenizerFast ]:
1549
1555
"""
1550
1556
Loads the text model and processor from the given model ID and revision into the ServeCommand instance.
1551
1557
@@ -1570,7 +1576,7 @@ def load_model_and_processor(self, model_id_and_revision: str) -> tuple[PreTrain
1570
1576
1571
1577
return model , processor
1572
1578
1573
- def load_audio_model_and_processor (self , model_id_and_revision : str ) -> tuple [PreTrainedModel , ProcessorMixin ]:
1579
+ def load_audio_model_and_processor (self , model_id_and_revision : str ) -> tuple [" PreTrainedModel" , ProcessorMixin ]:
1574
1580
"""
1575
1581
Loads the audio model and processor from the given model ID and revision into the ServeCommand instance.
1576
1582
0 commit comments