|
24 | 24 | from typing import Any, Callable, Optional, Union |
25 | 25 |
|
26 | 26 | import numpy as np |
27 | | -import pandas as pd |
28 | 27 | from PIL import Image |
29 | 28 | from transformers import PreTrainedTokenizerBase |
30 | 29 |
|
|
33 | 32 | from vllm.multimodal import MultiModalDataDict |
34 | 33 | from vllm.multimodal.image import convert_image_mode |
35 | 34 | from vllm.transformers_utils.tokenizer import AnyTokenizer, get_lora_tokenizer |
| 35 | +from vllm.utils import PlaceholderModule |
| 36 | + |
| 37 | +try: |
| 38 | + from datasets import load_dataset |
| 39 | +except ImportError: |
| 40 | + datasets = PlaceholderModule("datasets") |
| 41 | + load_dataset = datasets.placeholder_attr("load_dataset") |
| 42 | + |
| 43 | +try: |
| 44 | + import pandas as pd |
| 45 | +except ImportError: |
| 46 | + pd = PlaceholderModule("pandas") |
| 47 | + |
| 48 | +try: |
| 49 | + import librosa |
| 50 | +except ImportError: |
| 51 | + librosa = PlaceholderModule("librosa") |
36 | 52 |
|
37 | 53 | logger = logging.getLogger(__name__) |
38 | 54 |
|
@@ -636,13 +652,6 @@ def load_data(self, ): |
636 | 652 | if self.dataset_path is None: |
637 | 653 | raise ValueError("dataset_path must be provided for loading data.") |
638 | 654 |
|
639 | | - try: |
640 | | - import pandas as pd |
641 | | - except ImportError as e: |
642 | | - raise ImportError( |
643 | | - "Pandas is required for BurstGPTDataset. Please install it " |
644 | | - "using `pip install pandas`.") from e |
645 | | - |
646 | 655 | df = pd.read_csv(self.dataset_path) |
647 | 656 | # Filter to keep only GPT-4 rows. |
648 | 657 | gpt4_df = df[df["Model"] == "GPT-4"] |
@@ -717,13 +726,6 @@ def __init__( |
717 | 726 |
|
718 | 727 | def load_data(self) -> None: |
719 | 728 | """Load data from HuggingFace datasets.""" |
720 | | - try: |
721 | | - from datasets import load_dataset |
722 | | - except ImportError as e: |
723 | | - raise ImportError( |
724 | | - "Hugging Face datasets library is required for this dataset. " |
725 | | - "Please install it using `pip install datasets`.") from e |
726 | | - |
727 | 729 | self.data = load_dataset( |
728 | 730 | self.dataset_path, |
729 | 731 | name=self.dataset_subset, |
@@ -1147,13 +1149,6 @@ def sample( |
1147 | 1149 | output_len: Optional[int] = None, |
1148 | 1150 | **kwargs, |
1149 | 1151 | ) -> list: |
1150 | | - try: |
1151 | | - import librosa |
1152 | | - except ImportError as e: |
1153 | | - raise ImportError( |
1154 | | - "librosa is required for ASRDataset. Please install it " |
1155 | | - "using `pip install librosa`.") from e |
1156 | | - |
1157 | 1152 | output_len = (output_len |
1158 | 1153 | if output_len is not None else self.DEFAULT_OUTPUT_LEN) |
1159 | 1154 | prompt = ASRDataset.TRANSCRIPTION_PREAMBLE |
|
0 commit comments