|
29 | 29 |
|
30 | 30 | import QEfficient |
31 | 31 | from QEfficient.base.modeling_qeff import QEFFBaseModel |
32 | | -from QEfficient.base.onnx_transforms import FP16ClipTransform, SplitTensorsTransform |
| 32 | +from QEfficient.base.onnx_transforms import FP16ClipTransform |
33 | 33 | from QEfficient.base.pytorch_transforms import SplitGateUpWeightsTransform |
34 | 34 | from QEfficient.generation.cloud_infer import QAICInferenceSession |
35 | 35 | from QEfficient.generation.text_generation_inference import ( |
@@ -229,8 +229,7 @@ class QEFFAutoModel(QEFFTransformersBase): |
229 | 229 |
|
230 | 230 | _hf_auto_class = AutoModel |
231 | 231 | _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform] |
232 | | - _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] |
233 | | - _proxy_only_onnx_transforms = (SplitTensorsTransform,) |
| 232 | + _onnx_transforms = [FP16ClipTransform] |
234 | 233 |
|
235 | 234 | def __init__(self, model: nn.Module, pooling=None, **kwargs): |
236 | 235 | """ |
@@ -618,7 +617,7 @@ class QEFFAutoModelForSequenceClassification(QEFFTransformersBase): |
618 | 617 |
|
619 | 618 | _hf_auto_class = AutoModelForSequenceClassification |
620 | 619 | _pytorch_transforms = [CustomOpsTransform, TextClassificationTransform] |
621 | | - _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] |
| 620 | + _onnx_transforms = [] |
622 | 621 |
|
623 | 622 | def __init__(self, model: nn.Module, **kwargs): |
624 | 623 | """ |
@@ -860,7 +859,7 @@ class QEffVisionEncoderForTextImageToTextModel(QEFFBaseModel): |
860 | 859 | KVCacheTransform, |
861 | 860 | KVCacheExternalModuleMapperTransform, |
862 | 861 | ] |
863 | | - _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] |
| 862 | + _onnx_transforms = [] |
864 | 863 |
|
865 | 864 | def __init__(self, model: nn.modules, **kwargs): |
866 | 865 | """ |
@@ -999,7 +998,7 @@ class QEffCausalLMForTextImageToTextModel(QEFFBaseModel): |
999 | 998 | VlmKVOffloadTransform, |
1000 | 999 | SplitGateUpWeightsTransform, |
1001 | 1000 | ] |
1002 | | - _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] |
| 1001 | + _onnx_transforms = [] |
1003 | 1002 |
|
1004 | 1003 | def __init__(self, model, qaic_config: Optional[dict] = None, **kwargs): |
1005 | 1004 | """ |
@@ -1875,7 +1874,7 @@ class _QEFFAutoModelForImageTextToTextSingleQPC(QEFFTransformersBase, Multimodal |
1875 | 1874 | VlmNoKVOffloadTransform, |
1876 | 1875 | SplitGateUpWeightsTransform, |
1877 | 1876 | ] |
1878 | | - _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] |
| 1877 | + _onnx_transforms = [] |
1879 | 1878 |
|
1880 | 1879 | def __init__( |
1881 | 1880 | self, |
@@ -2627,7 +2626,7 @@ class QEFFAutoModelForCausalLM(QEFFBaseModel): |
2627 | 2626 | KVCacheExternalModuleMapperTransform, |
2628 | 2627 | ] |
2629 | 2628 |
|
2630 | | - _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] |
| 2629 | + _onnx_transforms = [] |
2631 | 2630 |
|
2632 | 2631 | def prefill( |
2633 | 2632 | self, |
@@ -3576,7 +3575,7 @@ class QEFFAutoModelForSpeechSeq2Seq(QEFFTransformersBase, MultimodalUtilityMixin |
3576 | 3575 |
|
3577 | 3576 | _hf_auto_class = AutoModelForSpeechSeq2Seq |
3578 | 3577 | _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform, KVCacheTransform] |
3579 | | - _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] |
| 3578 | + _onnx_transforms = [] |
3580 | 3579 |
|
3581 | 3580 | def __init__(self, model: nn.Module, **kwargs): |
3582 | 3581 | """ |
@@ -3935,7 +3934,7 @@ class QEFFAutoModelForCTC(QEFFTransformersBase): |
3935 | 3934 |
|
3936 | 3935 | _hf_auto_class = AutoModelForCTC |
3937 | 3936 | _pytorch_transforms = [CustomOpsTransform, AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform] |
3938 | | - _onnx_transforms = [FP16ClipTransform, SplitTensorsTransform] |
| 3937 | + _onnx_transforms = [] |
3939 | 3938 |
|
3940 | 3939 | def __init__(self, model: nn.Module, **kwargs): |
3941 | 3940 | super().__init__(model, **kwargs) |
|
0 commit comments