Skip to content

Commit 7a3cede

Browse files
committed
squashed and cleaned the commits
1 parent 98f54fe commit 7a3cede

File tree

5 files changed

+766
-0
lines changed

5 files changed

+766
-0
lines changed

py/torch_tensorrt/dynamo/_compiler.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def cross_compile_for_windows(
105105
l2_limit_for_tiling: int = _defaults.L2_LIMIT_FOR_TILING,
106106
offload_module_to_cpu: bool = _defaults.OFFLOAD_MODULE_TO_CPU,
107107
use_distributed_mode_trace: bool = _defaults.USE_DISTRIBUTED_MODE_TRACE,
108+
cpu_memory_budget: int = _defaults.CPU_MEMORY_BUDGET,
108109
**kwargs: Any,
109110
) -> torch.fx.GraphModule:
110111
"""Compile an ExportedProgram module using TensorRT in Linux for Inference in Windows
@@ -179,6 +180,7 @@ def cross_compile_for_windows(
179180
tiling_optimization_level (str): The optimization level of tiling strategies. A higher level allows TensorRT to spend more time searching for better tiling strategy. We currently support ["none", "fast", "moderate", "full"].
180181
l2_limit_for_tiling (int): The target L2 cache usage limit (in bytes) for tiling optimization (default is -1 which means no limit).
181182
use_distributed_mode_trace (bool): Using aot_autograd to trace the graph. This is enabled when DTensors or distributed tensors are present in distributed model
183+
cpu_memory_budget (int): The maximum amount of CPU memory to use for the compilation. If the compilation requires more memory than this budget, the compilation will fail. If set to -1, the compilation will use all available CPU memory.
182184
**kwargs: Any,
183185
Returns:
184186
torch.fx.GraphModule: Compiled FX Module, when run it will execute via TensorRT
@@ -334,6 +336,7 @@ def cross_compile_for_windows(
334336
"tiling_optimization_level": tiling_optimization_level,
335337
"l2_limit_for_tiling": l2_limit_for_tiling,
336338
"use_distributed_mode_trace": use_distributed_mode_trace,
339+
"cpu_memory_budget": cpu_memory_budget,
337340
}
338341

339342
# disable the following settings is not supported for cross compilation for windows feature
@@ -435,6 +438,7 @@ def compile(
435438
l2_limit_for_tiling: int = _defaults.L2_LIMIT_FOR_TILING,
436439
offload_module_to_cpu: bool = _defaults.OFFLOAD_MODULE_TO_CPU,
437440
use_distributed_mode_trace: bool = _defaults.USE_DISTRIBUTED_MODE_TRACE,
441+
cpu_memory_budget: int = _defaults.CPU_MEMORY_BUDGET,
438442
**kwargs: Any,
439443
) -> torch.fx.GraphModule:
440444
"""Compile an ExportedProgram module for NVIDIA GPUs using TensorRT
@@ -681,6 +685,7 @@ def compile(
681685
"l2_limit_for_tiling": l2_limit_for_tiling,
682686
"offload_module_to_cpu": offload_module_to_cpu,
683687
"use_distributed_mode_trace": use_distributed_mode_trace,
688+
"cpu_memory_budget": cpu_memory_budget,
684689
}
685690
logger.debug(f"CPU memory usage before lowering: {get_cpu_memory_usage()} MB")
686691
settings = CompilationSettings(**compilation_options)
@@ -854,6 +859,16 @@ def preserve_module_specs(
854859
require_full_compilation=settings.require_full_compilation,
855860
)
856861

862+
from torch_tensorrt.dynamo.partitioning._resource_partitioner import (
863+
resource_partition,
864+
)
865+
866+
partitioned_module = resource_partition(
867+
gm,
868+
partitioned_module,
869+
cpu_memory_budget=settings.cpu_memory_budget,
870+
)
871+
857872
dryrun_tracker.unsupported_ops = supported_ops.unsupported_operators
858873

859874
# The global partitioner leaves non-TRT nodes as-is

py/torch_tensorrt/dynamo/_defaults.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import platform
33
import tempfile
44

5+
import psutil
56
import torch
67
from torch_tensorrt._Device import Device
78
from torch_tensorrt._enums import EngineCapability, dtype
@@ -57,6 +58,7 @@
5758
L2_LIMIT_FOR_TILING = -1
5859
USE_DISTRIBUTED_MODE_TRACE = False
5960
OFFLOAD_MODULE_TO_CPU = False
61+
CPU_MEMORY_BUDGET = psutil.virtual_memory().available
6062

6163
if platform.system() == "Linux":
6264
import pwd

py/torch_tensorrt/dynamo/_settings.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from torch_tensorrt.dynamo._defaults import (
88
ASSUME_DYNAMIC_SHAPE_SUPPORT,
99
CACHE_BUILT_ENGINES,
10+
CPU_MEMORY_BUDGET,
1011
DISABLE_TF32,
1112
DLA_GLOBAL_DRAM_SIZE,
1213
DLA_LOCAL_DRAM_SIZE,
@@ -140,6 +141,7 @@ class CompilationSettings:
140141
l2_limit_for_tiling: int = L2_LIMIT_FOR_TILING
141142
use_distributed_mode_trace: bool = USE_DISTRIBUTED_MODE_TRACE
142143
offload_module_to_cpu: bool = OFFLOAD_MODULE_TO_CPU
144+
cpu_memory_budget: int = CPU_MEMORY_BUDGET
143145

144146
def __getstate__(self) -> dict[str, Any]:
145147
from torch_tensorrt.dynamo.conversion._ConverterRegistry import (

0 commit comments

Comments
 (0)