diff --git a/.github/workflows/build-test-linux.yml b/.github/workflows/build-test-linux.yml index d0fabf9993..a8e90a3222 100644 --- a/.github/workflows/build-test-linux.yml +++ b/.github/workflows/build-test-linux.yml @@ -138,6 +138,7 @@ jobs: pushd . cd tests/py python -m pip install -r requirements.txt + python -m pip install nvidia-modelopt[all] --extra-index-url https://pypi.nvidia.com cd dynamo python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py @@ -172,6 +173,7 @@ jobs: pushd . cd tests/py python -m pip install -r requirements.txt + python -m pip install nvidia-modelopt[all] --extra-index-url https://pypi.nvidia.com cd dynamo python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ popd diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index a3444c025d..da5f3b36c9 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -893,7 +893,7 @@ def get_attr(self, target: str, args: Any, kwargs: Any) -> np.ndarray: else: constant_tensor = frozen_attr - return to_torch(constant_tensor) + return to_torch(constant_tensor) def call_method(self, target: str, args: Any, kwargs: Any) -> Any: assert isinstance(target, str) diff --git a/py/torch_tensorrt/dynamo/conversion/converter_utils.py b/py/torch_tensorrt/dynamo/conversion/converter_utils.py index 0808db2ed8..685f40b254 100644 --- a/py/torch_tensorrt/dynamo/conversion/converter_utils.py +++ b/py/torch_tensorrt/dynamo/conversion/converter_utils.py @@ -590,42 +590,45 @@ def to_numpy( Returns: A Numpy array or None, if the input was None. """ - output = None + with unset_fake_temporarily(): + output = None - if value is None or isinstance(value, np.ndarray): - output = value + if value is None or isinstance(value, np.ndarray): + output = value - elif isinstance(value, torch.Tensor): - if value.is_quantized: - value = value.dequantize() - elif value.dtype == torch.bfloat16: - # TODO: Remove when numpy has a BF16 type - _LOGGER.warning( - "Requested a conversion of bfloat16 tensor from torch to numpy which isn't supported. Casting this tensor to FP32 precision currently. Please use to_torch() API for better data representation", - ) - value = value.to(torch.float) - - output = value.cpu().detach().contiguous().numpy() + elif isinstance(value, torch.Tensor): + if value.is_quantized: + value = value.dequantize() + elif value.dtype == torch.bfloat16: + # TODO: Remove when numpy has a BF16 type + _LOGGER.warning( + "Requested a conversion of bfloat16 tensor from torch to numpy which isn't supported. Casting this tensor to FP32 precision currently. Please use to_torch() API for better data representation", + ) + value = value.to(torch.float) - elif isinstance(value, int): - output = np.array([value], dtype=np.int32) + output = value.cpu().detach().contiguous().numpy() - elif isinstance(value, float): - output = np.array([value], dtype=np.float32) + elif isinstance(value, int): + output = np.array([value], dtype=np.int32) - elif isinstance(value, bool): - output = np.array([value], dtype=np.bool_) + elif isinstance(value, float): + output = np.array([value], dtype=np.float32) - if isinstance(output, np.ndarray) or output is None: - return ( - output - if (dtype is None or output is None) - else output.astype(_enums.dtype._from(dtype).to(np.dtype, use_default=True)) - ) - else: - raise AssertionError( - f"to_numpy can only be called on None, bool, int, float, np.ndarray, or torch.Tensor, got: {value}" - ) + elif isinstance(value, bool): + output = np.array([value], dtype=np.bool_) + + if isinstance(output, np.ndarray) or output is None: + return ( + output + if (dtype is None or output is None) + else output.astype( + _enums.dtype._from(dtype).to(np.dtype, use_default=True) + ) + ) + else: + raise AssertionError( + f"to_numpy can only be called on None, bool, int, float, np.ndarray, or torch.Tensor, got: {value}" + ) def to_torch( diff --git a/tests/py/dynamo/backend/test_backend_compiler.py b/tests/py/dynamo/backend/test_backend_compiler.py index 4c65800f05..6369d3805c 100644 --- a/tests/py/dynamo/backend/test_backend_compiler.py +++ b/tests/py/dynamo/backend/test_backend_compiler.py @@ -2,11 +2,10 @@ from copy import deepcopy import torch +import torch_tensorrt from torch.testing._internal.common_utils import TestCase, run_tests from torch_tensorrt.dynamo.partitioning import fast_partition -import torch_tensorrt - from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing @@ -51,7 +50,6 @@ def forward(self, x, y): pass_through_build_failures=True, torch_executed_ops={"torch.ops.aten.add.Tensor"}, use_python_runtime=False, - debug=True, ) optimized_model_results = optimized_model(*inputs).detach().cpu() torch_model_results = fx_graph(*inputs).detach().cpu() @@ -132,7 +130,6 @@ def forward(self, x, y): pass_through_build_failures=True, torch_executed_ops={"torch.ops.aten.add.Tensor"}, use_python_runtime=False, - debug=True, ) optimized_model_results = optimized_model(*inputs).detach().cpu() torch_model_results = model(*inputs).detach().cpu() @@ -177,7 +174,6 @@ def forward(self, x, y): optimization_level=4, version_compatible=True, max_aux_streams=5, - debug=True, ) optimized_model_results = optimized_model(*inputs).detach().cpu() torch_model_results = fx_graph(*inputs).detach().cpu() @@ -225,7 +221,6 @@ def forward(self, x, y): min_block_size=1, pass_through_build_failures=True, truncate_double=True, - debug=True, ) optimized_model_results = optimized_model(*inputs).detach().cpu() torch_model_results = fx_graph(*inputs).detach().cpu() @@ -298,7 +293,6 @@ def forward(self, x, y): min_block_size=1, pass_through_build_failures=True, truncate_double=False, - debug=True, torch_executed_ops={"torch.ops.aten.add.Tensor"}, ) optimized_model_results = optimized_model(*inputs).detach().cpu() diff --git a/tests/py/dynamo/conversion/harness.py b/tests/py/dynamo/conversion/harness.py index 6ff45507a0..aa22a74fc0 100644 --- a/tests/py/dynamo/conversion/harness.py +++ b/tests/py/dynamo/conversion/harness.py @@ -415,7 +415,6 @@ def run_test( compilation_settings = CompilationSettings( enabled_precisions={dtype._from(precision)}, truncate_double=True, - debug=True, immutable_weights=immutable_weights, ) @@ -507,7 +506,6 @@ def run_test_compare_tensor_attributes_only( compilation_settings = CompilationSettings( enabled_precisions={dtype._from(precision)}, truncate_double=True, - debug=True, immutable_weights=immutable_weights, ) diff --git a/tests/py/dynamo/models/test_dtype_support.py b/tests/py/dynamo/models/test_dtype_support.py index 146f7fdb7d..37b40574a1 100644 --- a/tests/py/dynamo/models/test_dtype_support.py +++ b/tests/py/dynamo/models/test_dtype_support.py @@ -297,7 +297,6 @@ def forward(self, x): ir="torch_compile", inputs=inputs, enabled_precisions={torch.bfloat16}, - debug=True, min_block_size=1, device=device, cache_built_engines=False, diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py index 19fdeaa9ab..0c28b23bba 100644 --- a/tests/py/dynamo/models/test_models_export.py +++ b/tests/py/dynamo/models/test_models_export.py @@ -254,7 +254,6 @@ def calibrate_loop(model): @unittest.skipIf( platform.system() != "Linux" - or torch.cuda.get_device_capability() < (8, 9) or not importlib.util.find_spec("modelopt") or Version(metadata.version("nvidia-modelopt")) < Version("0.17.0"), "modelopt 0.17.0 or later is required, Int8 quantization is supported in modelopt since 0.17.0 or later for linux", @@ -290,7 +289,7 @@ def calibrate_loop(model): with torch.no_grad(): with export_torch_mode(): - exp_program = torch.export.export(model, (input_tensor,)) + exp_program = torch.export.export(model, (input_tensor,), strict=False) trt_model = torchtrt.dynamo.compile( exp_program, inputs=[input_tensor], diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt index 011ed01e35..f247fa5696 100644 --- a/tests/py/requirements.txt +++ b/tests/py/requirements.txt @@ -10,5 +10,3 @@ pyyaml timm>=1.0.3 flashinfer-python; python_version < "3.13" transformers==4.49.0 -nvidia-modelopt[deploy,hf,torch]~=0.17.0; python_version < "3.13" ---extra-index-url https://pypi.nvidia.com