Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/dynamo/aot_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def forward(self, X: torch.Tensor) -> torch.Tensor:
)
args = parser.parse_args()

my_model = MyModel().to("cuda")
my_model = MyModel().to("cuda").eval()
m = torch.full((64, 64), 2, device="cuda", dtype=torch.float)

assert my_model(X=m)[0][0] == 3.0
Expand Down
2 changes: 1 addition & 1 deletion examples/dynamo/auto_generate_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
return res


my_model = MyModel().to("cuda")
my_model = MyModel().to("cuda").eval()
m = torch.full((64, 64), 2, device="cuda", dtype=torch.float)
n = torch.full((64, 64), 3, device="cuda", dtype=torch.float)

Expand Down
2 changes: 1 addition & 1 deletion examples/dynamo/auto_generate_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
return res


my_model = MyModel().to("cuda")
my_model = MyModel().to("cuda").eval()
m = torch.randint(0, 5, (64, 64), device="cuda", dtype=torch.float)
n = torch.randint(0, 5, (64, 64), device="cuda", dtype=torch.float)

Expand Down
4 changes: 2 additions & 2 deletions examples/dynamo/converter_overloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def forward(self, x):
return torch.nn.functional.gelu(x, approximate=self.mode)


my_mod = GeLU(mode="tanh")
my_mod = GeLU(mode="tanh").to("cuda").eval()
ex_input = torch.randn(2, 5).to("cuda")


Expand Down Expand Up @@ -198,7 +198,7 @@ def get_op_count():
#
# Finally, we want to verify that in the case that the ``approximate`` argument is not set to ``tanh``, our custom converter is not used.

my_mod_erf = GeLU(mode="none")
my_mod_erf = GeLU(mode="none").to("cuda").eval()
my_gelu_erf = torch_tensorrt.compile(
my_mod_erf, arg_inputs=(ex_input,), min_block_size=1
)
Expand Down
2 changes: 1 addition & 1 deletion examples/dynamo/cross_runtime_compilation_for_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@

args = PARSER.parse_args()
torch.manual_seed(0)
model = models.resnet18().eval().cuda()
model = models.resnet18().cuda().eval()
input = torch.rand((1, 3, 224, 224)).to("cuda")
inputs = [input]

Expand Down
2 changes: 1 addition & 1 deletion examples/dynamo/custom_kernel_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
return y


my_model = MyModel((1, 1, 2, 0)).to("cuda")
my_model = MyModel((1, 1, 2, 0)).to("cuda").eval()
my_model(ex_input)

##############################################################################
Expand Down
2 changes: 1 addition & 1 deletion examples/dynamo/engine_caching_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
np.random.seed(0)
torch.manual_seed(0)

model = models.resnet18(pretrained=True).eval().to("cuda")
model = models.resnet18(pretrained=True).to("cuda").eval()
enabled_precisions = {torch.float}
min_block_size = 1
use_python_runtime = False
Expand Down
2 changes: 1 addition & 1 deletion examples/dynamo/llama2_flashinfer_rmsnorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def replace_rmsnorm(

# 2. Initialize model (random weights)
with torch.no_grad():
model = LlamaForCausalLM(config).eval().half()
model = LlamaForCausalLM(config).cuda().half().eval()

# 3. Export with static shapes
input_ids = torch.randint(0, 32000, (1, 64)) # Static [batch=1, seq=64]
Expand Down
8 changes: 4 additions & 4 deletions examples/dynamo/mutable_torchtrt_module_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
"immutable_weights": False,
}

model = models.resnet18(pretrained=True).eval().to("cuda")
model = models.resnet18(pretrained=True).to("cuda").eval()
mutable_module = torch_trt.MutableTorchTensorRTModule(model, **settings)
# You can use the mutable module just like the original pytorch module. The compilation happens while you first call the mutable module.
mutable_module(*inputs)
Expand All @@ -47,7 +47,7 @@

# %%
# Making changes to mutable module can trigger refit or re-compilation. For example, loading a different state_dict and setting new weight values will trigger refit, and adding a module to the model will trigger re-compilation.
model2 = models.resnet18(pretrained=False).eval().to("cuda")
model2 = models.resnet18(pretrained=False).to("cuda").eval()
mutable_module.load_state_dict(model2.state_dict())


Expand Down Expand Up @@ -163,7 +163,7 @@ def forward(self, a, b, c={}):


device = "cuda:0"
model = Model().eval().to(device)
model = Model().to(device).eval()
inputs = (torch.rand(10, 3).to(device), torch.rand(3, 30).to(device))
kwargs = {
"c": {"a": torch.rand(10, 30).to(device), "b": torch.rand(10, 30).to(device)},
Expand Down Expand Up @@ -199,7 +199,7 @@ def forward(self, a, b, c={}):

from torch_tensorrt.dynamo._defaults import TIMING_CACHE_PATH

model = models.resnet18(pretrained=True).eval().to("cuda")
model = models.resnet18(pretrained=True).to("cuda").eval()

times = []
start = torch.cuda.Event(enable_timing=True)
Expand Down
4 changes: 2 additions & 2 deletions examples/dynamo/pre_allocated_output_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ def test_module_perf(model, *input):
# Load bert model
model = (
BertModel.from_pretrained("bert-base-uncased", torchscript=True)
.eval()
.half()
.to("cuda")
.half()
.eval()
)
# Define sample inputs
inputs = [
Expand Down
5 changes: 2 additions & 3 deletions examples/dynamo/refit_engine_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
#
# In this case we are going to compile a ResNet18 model with randomly initialized weights and save it.

model = models.resnet18(pretrained=False).eval().to("cuda")
model = models.resnet18(pretrained=False).to("cuda").eval()
exp_program = torch.export.export(model, tuple(inputs))
enabled_precisions = {torch.float}
workspace_size = 20 << 30
Expand Down Expand Up @@ -85,7 +85,7 @@
# function is used to update the weights of the compiled module with the new weights.

# Create and compile the updated model
model2 = models.resnet18(pretrained=True).eval().to("cuda")
model2 = models.resnet18(pretrained=True).to("cuda").eval()
exp_program2 = torch.export.export(model2, tuple(inputs))


Expand All @@ -99,7 +99,6 @@
)

# Check the output
model2.to("cuda")
expected_outputs, refitted_outputs = exp_program2.module()(*inputs), new_trt_gm(*inputs)
for expected_output, refitted_output in zip(expected_outputs, refitted_outputs):
assert torch.allclose(
Expand Down
4 changes: 2 additions & 2 deletions examples/dynamo/torch_compile_advanced_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):

# Define sample float inputs and initialize model
sample_inputs = [torch.rand((5, 7)).cuda(), torch.rand((5, 7)).cuda()]
model = Model().eval().cuda()
model = Model().cuda().eval()

# %%

Expand All @@ -60,7 +60,7 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
torch.rand((5, 7)).half().cuda(),
torch.rand((5, 7)).half().cuda(),
]
model_half = Model().eval().cuda()
model_half = Model().cuda().eval()

# %%

Expand Down
4 changes: 2 additions & 2 deletions examples/dynamo/torch_compile_gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
use_cache=False,
attn_implementation="eager",
)
.to(DEVICE)
.eval()
.cuda()
)

# %%
Expand All @@ -54,7 +54,7 @@
# Tokenize a sample input prompt and get pytorch model outputs
prompt = "I enjoy walking with my cute dog"
model_inputs = tokenizer(prompt, return_tensors="pt")
input_ids = model_inputs["input_ids"].cuda()
input_ids = model_inputs["input_ids"].to(DEVICE)

# %%
# The ``generate()`` API of the ``AutoModelForCausalLM`` class is used for auto-regressive generation with greedy decoding.
Expand Down
12 changes: 6 additions & 6 deletions examples/dynamo/torch_compile_resnet_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# %%

# Initialize model with half precision and sample inputs
model = models.resnet18(pretrained=True).half().eval().to("cuda")
model = models.resnet18(pretrained=True).to("cuda").half().eval()
inputs = [torch.randn((1, 3, 224, 224)).to("cuda").half()]

# %%
Expand Down Expand Up @@ -63,21 +63,21 @@
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

# Does not cause recompilation (same batch size as input)
new_inputs = [torch.randn((1, 3, 224, 224)).half().to("cuda")]
new_inputs = [torch.randn((1, 3, 224, 224)).to("cuda").half()]
new_outputs = optimized_model(*new_inputs)

# %%

# Does cause recompilation (new batch size)
new_batch_size_inputs = [torch.randn((8, 3, 224, 224)).half().to("cuda")]
new_batch_size_inputs = [torch.randn((8, 3, 224, 224)).to("cuda").half()]
new_batch_size_outputs = optimized_model(*new_batch_size_inputs)

# %%
# Avoid recompilation by specifying dynamic shapes before Torch-TRT compilation
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

# The following code illustrates the workflow using ir=torch_compile (which uses torch.compile under the hood)
inputs_bs8 = torch.randn((8, 3, 224, 224)).half().to("cuda")
inputs_bs8 = torch.randn((8, 3, 224, 224)).to("cuda").half()
# This indicates dimension 0 of inputs_bs8 is dynamic whose range of values is [2, 16]
torch._dynamo.mark_dynamic(inputs_bs8, 0, min=2, max=16)
optimized_model = torch_tensorrt.compile(
Expand All @@ -92,7 +92,7 @@
outputs_bs8 = optimized_model(inputs_bs8)

# No recompilation happens for batch size = 12
inputs_bs12 = torch.randn((12, 3, 224, 224)).half().to("cuda")
inputs_bs12 = torch.randn((12, 3, 224, 224)).to("cuda").half()
outputs_bs12 = optimized_model(inputs_bs12)

# The following code illustrates the workflow using ir=dynamo (which uses torch.export APIs under the hood)
Expand All @@ -112,5 +112,5 @@
trt_model = torch_tensorrt.compile(model, **compile_spec)

# No recompilation happens for batch size = 12
inputs_bs12 = torch.randn((12, 3, 224, 224)).half().to("cuda")
inputs_bs12 = torch.randn((12, 3, 224, 224)).to("cuda").half()
outputs_bs12 = trt_model(inputs_bs12)
2 changes: 1 addition & 1 deletion examples/dynamo/torch_compile_transformers_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# %%

# Initialize model with float precision and sample inputs
model = BertModel.from_pretrained("bert-base-uncased").eval().to("cuda")
model = BertModel.from_pretrained("bert-base-uncased").to("cuda").eval()
inputs = [
torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda"),
torch.randint(0, 2, (1, 14), dtype=torch.int32).to("cuda"),
Expand Down
6 changes: 3 additions & 3 deletions examples/dynamo/torch_export_cudagraphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

# We begin by defining and initializing a model
model = models.resnet18(pretrained=True).eval().to("cuda")
model = models.resnet18(pretrained=True).cuda().eval()

# Define sample inputs
inputs = torch.randn((16, 3, 224, 224)).cuda()
Expand Down Expand Up @@ -101,8 +101,8 @@ def forward(self, x):
return torch.relu((x + 2) * 0.5)


model = SampleModel().eval().cuda()
input = torch.randn((1, 3, 224, 224)).to("cuda")
model = SampleModel().cuda().eval()
input = torch.randn((1, 3, 224, 224)).cuda()

# The 'torch_executed_ops' compiler option is used in this example to intentionally introduce graph breaks within the module.
# Note: The Dynamo backend is required for the CUDA Graph context manager to handle modules in an Ahead-Of-Time (AOT) manner.
Expand Down
4 changes: 2 additions & 2 deletions examples/dynamo/torch_export_sam2.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def forward(self, image, point_coords, point_labels):
# Initialize the ``SAM2FullModel`` with the pretrained weights. Since we already initialized
# ``SAM2ImagePredictor``, we can directly use the model from it (``predictor.model``). We cast the model
# to FP16 precision for faster performance.
encoder = predictor.model.eval().cuda()
sam_model = SAM2FullModel(encoder.half()).eval().cuda()
encoder = predictor.model.cuda().eval()
sam_model = SAM2FullModel(encoder.half()).cuda().eval()

# %%
# Load a sample image provided in the repository.
Expand Down
2 changes: 1 addition & 1 deletion examples/dynamo/vgg16_ptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def vgg16(num_classes=1000, init_weights=False):
args = PARSER.parse_args()

model = vgg16(num_classes=10, init_weights=False)
model = model.cuda()
model = model.cuda().eval()

# %%
# Load the pre-trained model weights
Expand Down
Loading