Skip to content
Merged
2 changes: 2 additions & 0 deletions .github/workflows/build-test-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ jobs:
pushd .
cd tests/py
python -m pip install -r requirements.txt
python -m pip install nvidia-modelopt[all] --extra-index-url https://pypi.nvidia.com
cd dynamo
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py
Expand Down Expand Up @@ -172,6 +173,7 @@ jobs:
pushd .
cd tests/py
python -m pip install -r requirements.txt
python -m pip install nvidia-modelopt[all] --extra-index-url https://pypi.nvidia.com
cd dynamo
python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/
popd
Expand Down
2 changes: 1 addition & 1 deletion py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,7 @@ def get_attr(self, target: str, args: Any, kwargs: Any) -> np.ndarray:
else:
constant_tensor = frozen_attr

return to_torch(constant_tensor)
return to_torch(constant_tensor)

def call_method(self, target: str, args: Any, kwargs: Any) -> Any:
assert isinstance(target, str)
Expand Down
63 changes: 33 additions & 30 deletions py/torch_tensorrt/dynamo/conversion/converter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,42 +590,45 @@ def to_numpy(
Returns:
A Numpy array or None, if the input was None.
"""
output = None
with unset_fake_temporarily():
output = None

if value is None or isinstance(value, np.ndarray):
output = value
if value is None or isinstance(value, np.ndarray):
output = value

elif isinstance(value, torch.Tensor):
if value.is_quantized:
value = value.dequantize()
elif value.dtype == torch.bfloat16:
# TODO: Remove when numpy has a BF16 type
_LOGGER.warning(
"Requested a conversion of bfloat16 tensor from torch to numpy which isn't supported. Casting this tensor to FP32 precision currently. Please use to_torch() API for better data representation",
)
value = value.to(torch.float)

output = value.cpu().detach().contiguous().numpy()
elif isinstance(value, torch.Tensor):
if value.is_quantized:
value = value.dequantize()
elif value.dtype == torch.bfloat16:
# TODO: Remove when numpy has a BF16 type
_LOGGER.warning(
"Requested a conversion of bfloat16 tensor from torch to numpy which isn't supported. Casting this tensor to FP32 precision currently. Please use to_torch() API for better data representation",
)
value = value.to(torch.float)

elif isinstance(value, int):
output = np.array([value], dtype=np.int32)
output = value.cpu().detach().contiguous().numpy()

elif isinstance(value, float):
output = np.array([value], dtype=np.float32)
elif isinstance(value, int):
output = np.array([value], dtype=np.int32)

elif isinstance(value, bool):
output = np.array([value], dtype=np.bool_)
elif isinstance(value, float):
output = np.array([value], dtype=np.float32)

if isinstance(output, np.ndarray) or output is None:
return (
output
if (dtype is None or output is None)
else output.astype(_enums.dtype._from(dtype).to(np.dtype, use_default=True))
)
else:
raise AssertionError(
f"to_numpy can only be called on None, bool, int, float, np.ndarray, or torch.Tensor, got: {value}"
)
elif isinstance(value, bool):
output = np.array([value], dtype=np.bool_)

if isinstance(output, np.ndarray) or output is None:
return (
output
if (dtype is None or output is None)
else output.astype(
_enums.dtype._from(dtype).to(np.dtype, use_default=True)
)
)
else:
raise AssertionError(
f"to_numpy can only be called on None, bool, int, float, np.ndarray, or torch.Tensor, got: {value}"
)


def to_torch(
Expand Down
8 changes: 1 addition & 7 deletions tests/py/dynamo/backend/test_backend_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
from copy import deepcopy

import torch
import torch_tensorrt
from torch.testing._internal.common_utils import TestCase, run_tests
from torch_tensorrt.dynamo.partitioning import fast_partition

import torch_tensorrt

from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing


Expand Down Expand Up @@ -51,7 +50,6 @@ def forward(self, x, y):
pass_through_build_failures=True,
torch_executed_ops={"torch.ops.aten.add.Tensor"},
use_python_runtime=False,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = fx_graph(*inputs).detach().cpu()
Expand Down Expand Up @@ -132,7 +130,6 @@ def forward(self, x, y):
pass_through_build_failures=True,
torch_executed_ops={"torch.ops.aten.add.Tensor"},
use_python_runtime=False,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = model(*inputs).detach().cpu()
Expand Down Expand Up @@ -177,7 +174,6 @@ def forward(self, x, y):
optimization_level=4,
version_compatible=True,
max_aux_streams=5,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = fx_graph(*inputs).detach().cpu()
Expand Down Expand Up @@ -225,7 +221,6 @@ def forward(self, x, y):
min_block_size=1,
pass_through_build_failures=True,
truncate_double=True,
debug=True,
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
torch_model_results = fx_graph(*inputs).detach().cpu()
Expand Down Expand Up @@ -298,7 +293,6 @@ def forward(self, x, y):
min_block_size=1,
pass_through_build_failures=True,
truncate_double=False,
debug=True,
torch_executed_ops={"torch.ops.aten.add.Tensor"},
)
optimized_model_results = optimized_model(*inputs).detach().cpu()
Expand Down
2 changes: 0 additions & 2 deletions tests/py/dynamo/conversion/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,6 @@ def run_test(
compilation_settings = CompilationSettings(
enabled_precisions={dtype._from(precision)},
truncate_double=True,
debug=True,
immutable_weights=immutable_weights,
)

Expand Down Expand Up @@ -507,7 +506,6 @@ def run_test_compare_tensor_attributes_only(
compilation_settings = CompilationSettings(
enabled_precisions={dtype._from(precision)},
truncate_double=True,
debug=True,
immutable_weights=immutable_weights,
)

Expand Down
1 change: 0 additions & 1 deletion tests/py/dynamo/models/test_dtype_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ def forward(self, x):
ir="torch_compile",
inputs=inputs,
enabled_precisions={torch.bfloat16},
debug=True,
min_block_size=1,
device=device,
cache_built_engines=False,
Expand Down
3 changes: 1 addition & 2 deletions tests/py/dynamo/models/test_models_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,6 @@ def calibrate_loop(model):

@unittest.skipIf(
platform.system() != "Linux"
or torch.cuda.get_device_capability() < (8, 9)
or not importlib.util.find_spec("modelopt")
or Version(metadata.version("nvidia-modelopt")) < Version("0.17.0"),
"modelopt 0.17.0 or later is required, Int8 quantization is supported in modelopt since 0.17.0 or later for linux",
Expand Down Expand Up @@ -290,7 +289,7 @@ def calibrate_loop(model):

with torch.no_grad():
with export_torch_mode():
exp_program = torch.export.export(model, (input_tensor,))
exp_program = torch.export.export(model, (input_tensor,), strict=False)
trt_model = torchtrt.dynamo.compile(
exp_program,
inputs=[input_tensor],
Expand Down
2 changes: 0 additions & 2 deletions tests/py/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,3 @@ pyyaml
timm>=1.0.3
flashinfer-python; python_version < "3.13"
transformers==4.49.0
nvidia-modelopt[deploy,hf,torch]~=0.17.0; python_version < "3.13"
--extra-index-url https://pypi.nvidia.com
Loading