Skip to content

Commit 17afde4

Browse files
authored
fix: set example models to eval mode and follow the convention (#3770)
1 parent e6b0a88 commit 17afde4

22 files changed

+210
-170
lines changed

examples/dynamo/aot_plugin.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def forward(self, X: torch.Tensor) -> torch.Tensor:
153153
)
154154
args = parser.parse_args()
155155

156-
my_model = MyModel().to("cuda")
156+
my_model = MyModel().to("cuda").eval()
157157
m = torch.full((64, 64), 2, device="cuda", dtype=torch.float)
158158

159159
assert my_model(X=m)[0][0] == 3.0
@@ -167,8 +167,9 @@ def forward(self, X: torch.Tensor) -> torch.Tensor:
167167
)
168168
print("Model compiled successfully!")
169169
print("Running inference with compiled model...")
170-
for i in range(10):
171-
res = model_trt(m)
172-
assert torch.allclose(res, my_model(m)), "Results do not match!"
170+
with torch.no_grad():
171+
for i in range(10):
172+
res = model_trt(m)
173+
assert torch.allclose(res, my_model(m)), "Results do not match!"
173174

174175
print("Inference successful!")

examples/dynamo/auto_generate_converters.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,14 +169,15 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
169169
return res
170170

171171

172-
my_model = MyModel().to("cuda")
172+
my_model = MyModel().to("cuda").eval()
173173
m = torch.full((64, 64), 2, device="cuda", dtype=torch.float)
174174
n = torch.full((64, 64), 3, device="cuda", dtype=torch.float)
175175

176176
with torch_tensorrt.logging.errors():
177177
model_trt = torch_tensorrt.compile(my_model, inputs=[m, n], min_block_size=1)
178-
for i in range(300):
179-
res = model_trt(m, n)
180-
assert torch.allclose(res, my_model(m, n))
178+
with torch.no_grad():
179+
for i in range(300):
180+
res = model_trt(m, n)
181+
assert torch.allclose(res, my_model(m, n))
181182

182183
print("Ran with custom plugin!")

examples/dynamo/auto_generate_plugins.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,15 @@ def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
139139
return res
140140

141141

142-
my_model = MyModel().to("cuda")
142+
my_model = MyModel().to("cuda").eval()
143143
m = torch.randint(0, 5, (64, 64), device="cuda", dtype=torch.float)
144144
n = torch.randint(0, 5, (64, 64), device="cuda", dtype=torch.float)
145145

146146
with torch_tensorrt.logging.errors():
147147
model_trt = torch_tensorrt.compile(my_model, inputs=[m, n], min_block_size=1)
148-
for i in range(300):
149-
res = model_trt(m, n)
150-
assert torch.allclose(res, my_model(m, n))
148+
with torch.no_grad():
149+
for i in range(300):
150+
res = model_trt(m, n)
151+
assert torch.allclose(res, my_model(m, n))
151152

152153
print("Ran with custom plugin!")

examples/dynamo/converter_overloading.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def forward(self, x):
3434
return torch.nn.functional.gelu(x, approximate=self.mode)
3535

3636

37-
my_mod = GeLU(mode="tanh")
37+
my_mod = GeLU(mode="tanh").to("cuda").eval()
3838
ex_input = torch.randn(2, 5).to("cuda")
3939

4040

@@ -182,9 +182,9 @@ def get_op_count():
182182
my_custom_gelu = torch_tensorrt.compile(
183183
my_mod, arg_inputs=(ex_input,), min_block_size=1
184184
)
185-
186-
print(my_custom_gelu.graph)
187-
print(my_custom_gelu(ex_input))
185+
with torch.no_grad():
186+
print(my_custom_gelu.graph)
187+
print(my_custom_gelu(ex_input))
188188

189189
# %%
190190
#
@@ -198,7 +198,7 @@ def get_op_count():
198198
#
199199
# Finally, we want to verify that in the case that the ``approximate`` argument is not set to ``tanh``, our custom converter is not used.
200200

201-
my_mod_erf = GeLU(mode="none")
201+
my_mod_erf = GeLU(mode="none").to("cuda").eval()
202202
my_gelu_erf = torch_tensorrt.compile(
203203
my_mod_erf, arg_inputs=(ex_input,), min_block_size=1
204204
)
@@ -207,6 +207,6 @@ def get_op_count():
207207
#
208208
# Notice that we don't see the print statement from our custom converter, indicating that it was not used. However, looking at the graph, we can still see that a TensorRT engine was created to run the GeLU operation.
209209
# In this case, the validator for our custom converter returned ``False``, so the conversion system moved on to the next converter in the list, the standard GeLU converter and used that one to convert the operation.
210-
211-
print(my_gelu_erf.graph)
212-
print(my_gelu_erf(ex_input))
210+
with torch.no_grad():
211+
print(my_gelu_erf.graph)
212+
print(my_gelu_erf(ex_input))

examples/dynamo/cross_runtime_compilation_for_windows.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646

4747
args = PARSER.parse_args()
4848
torch.manual_seed(0)
49-
model = models.resnet18().eval().cuda()
49+
model = models.resnet18().cuda().eval()
5050
input = torch.rand((1, 3, 224, 224)).to("cuda")
5151
inputs = [input]
5252

@@ -63,7 +63,8 @@
6363
loaded_model = torchtrt.load_cross_compiled_exported_program(args.path).module()
6464
print(f"model has been successfully loaded from ${args.path}")
6565
# inference
66-
trt_output = loaded_model(input)
66+
with torch.no_grad():
67+
trt_output = loaded_model(input)
6768
print(f"inference result: {trt_output}")
6869
else:
6970
if platform.system() != "Linux" or platform.architecture()[0] != "64bit":

examples/dynamo/custom_kernel_plugins.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
217217
return y
218218

219219

220-
my_model = MyModel((1, 1, 2, 0)).to("cuda")
221-
my_model(ex_input)
220+
my_model = MyModel((1, 1, 2, 0)).to("cuda").eval()
221+
with torch.no_grad():
222+
my_model(ex_input)
222223

223224
##############################################################################
224225
# .. code-block:: none
@@ -607,7 +608,8 @@ def circular_padding_converter(
607608
##############################################
608609
# As you can see, now there is only one subgraph created for the TensorRT engine that contains both our custom kernel and the native convolution operator.
609610

610-
print(trt_model(ex_input))
611+
with torch.no_grad():
612+
print(trt_model(ex_input))
611613

612614
##############################################################################
613615
# .. code-block:: none
@@ -636,7 +638,8 @@ def circular_padding_converter(
636638
# %%
637639
# We can verify our implementation is run correctly by both TensorRT and PyTorch
638640

639-
print(my_model(ex_input) - trt_model(ex_input))
641+
with torch.no_grad():
642+
print(my_model(ex_input) - trt_model(ex_input))
640643

641644
##############################################################################
642645
# .. code-block:: none

examples/dynamo/engine_caching_bert_example.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ def compile_bert(iterations=3):
6262
backend="torch_tensorrt",
6363
options=compilation_kwargs,
6464
)
65-
optimized_model(*inputs)
65+
with torch.no_grad():
66+
optimized_model(*inputs)
6667
end.record()
6768
torch.cuda.synchronize()
6869
times.append(start.elapsed_time(end))

examples/dynamo/engine_caching_example.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
np.random.seed(0)
3838
torch.manual_seed(0)
3939

40-
model = models.resnet18(pretrained=True).eval().to("cuda")
40+
model = models.resnet18(pretrained=True).to("cuda").eval()
4141
enabled_precisions = {torch.float}
4242
min_block_size = 1
4343
use_python_runtime = False
@@ -100,7 +100,8 @@ def torch_compile(iterations=3):
100100
"reuse_cached_engines": reuse_cached_engines,
101101
},
102102
)
103-
compiled_model(*inputs) # trigger the compilation
103+
with torch.no_grad():
104+
compiled_model(*inputs) # trigger the compilation
104105
end.record()
105106
torch.cuda.synchronize()
106107
times.append(start.elapsed_time(end))
@@ -270,7 +271,8 @@ def torch_compile_my_cache(iterations=3):
270271
"custom_engine_cache": engine_cache,
271272
},
272273
)
273-
compiled_model(*inputs) # trigger the compilation
274+
with torch.no_grad():
275+
compiled_model(*inputs) # trigger the compilation
274276
end.record()
275277
torch.cuda.synchronize()
276278
times.append(start.elapsed_time(end))

examples/dynamo/hierarchical_partitioner_example.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ def main():
7979

8080
print("Original Model Structure:\n", gm)
8181

82-
original_output = model(example_input)
82+
with torch.no_grad():
83+
original_output = model(example_input)
8384

8485
# 1. Partition the model into blocks that can be executed by different backends
8586
partitioned_model, op_support = hierarchical_adjacency_partition(

examples/dynamo/llama2_flashinfer_rmsnorm.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def replace_rmsnorm(
220220

221221
# 2. Initialize model (random weights)
222222
with torch.no_grad():
223-
model = LlamaForCausalLM(config).eval().half()
223+
model = LlamaForCausalLM(config).cuda().half().eval()
224224

225225
# 3. Export with static shapes
226226
input_ids = torch.randint(0, 32000, (1, 64)) # Static [batch=1, seq=64]
@@ -253,5 +253,6 @@ def replace_rmsnorm(
253253

254254
input_ids = input_ids.to(DEVICE)
255255

256-
res = trt_model.forward(input_ids)
256+
with torch.no_grad():
257+
res = trt_model.forward(input_ids)
257258
print(res)

0 commit comments

Comments
 (0)