|
17 | 17 | """Wrapper around `transformers` models"""
|
18 | 18 | from collections.abc import Iterable, Mapping
|
19 | 19 | from contextlib import contextmanager
|
| 20 | +from pathlib import Path |
20 | 21 | from typing import Literal, Optional, Union
|
21 | 22 |
|
22 | 23 | import regex as re
|
|
60 | 61 | logger = init_logger(__name__)
|
61 | 62 |
|
62 | 63 |
|
| 64 | +def get_feature_request_tip( |
| 65 | + model: str, |
| 66 | + trust_remote_code: bool, |
| 67 | +) -> str: |
| 68 | + hf_url = f"a discussion at https://huggingface.co/{model}/discussions/new" |
| 69 | + gh_url = "an issue at https://github.com/huggingface/transformers/issues/new/choose" |
| 70 | + url = hf_url if trust_remote_code else gh_url |
| 71 | + prefix = f"Please open {url} to request support for this feature. " |
| 72 | + if Path(model).exists(): |
| 73 | + prefix = "" |
| 74 | + doc_url = "https://docs.vllm.ai/en/latest/models/supported_models.html#writing-custom-models" |
| 75 | + tip = f"See {doc_url} for instructions on how to add support yourself." |
| 76 | + return f"{prefix}{tip}" |
| 77 | + |
| 78 | + |
63 | 79 | def vllm_flash_attention_forward(
|
64 | 80 | # Transformers args
|
65 | 81 | module: torch.nn.Module,
|
@@ -480,8 +496,11 @@ def pipeline_parallel(self):
|
480 | 496 | return
|
481 | 497 |
|
482 | 498 | if not self.model.supports_pp_plan:
|
| 499 | + tip = get_feature_request_tip(self.model_config.model, |
| 500 | + self.model_config.trust_remote_code) |
483 | 501 | raise ValueError(
|
484 |
| - f"{type(self.model)} does not support pipeline parallel yet!") |
| 502 | + f"{type(self.model)} does not support pipeline parallel. {tip}" |
| 503 | + ) |
485 | 504 |
|
486 | 505 | module_lists = []
|
487 | 506 | module_list_idx = None
|
@@ -535,8 +554,10 @@ def tensor_parallel(self):
|
535 | 554 | models_with_tp_plan = filter(supports_tp_plan, pretrained_models)
|
536 | 555 |
|
537 | 556 | if not any(models_with_tp_plan) and self.tp_size > 1:
|
| 557 | + tip = get_feature_request_tip(self.model_config.model, |
| 558 | + self.model_config.trust_remote_code) |
538 | 559 | raise ValueError(
|
539 |
| - f"{type(self.model)} does not support tensor parallel yet!") |
| 560 | + f"{type(self.model)} does not support tensor parallel. {tip}") |
540 | 561 |
|
541 | 562 | def _tensor_parallel(module: nn.Module,
|
542 | 563 | prefix: str = "",
|
|
0 commit comments