pytorch
diff --git a/‎py/torch_tensorrt/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎py/torch_tensorrt/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎py/torch_tensorrt/_compile.py‎
Lines changed: 75 additions & 10 deletions b/‎py/torch_tensorrt/_compile.py‎
Lines changed: 75 additions & 10 deletions
diff --git a/‎py/torch_tensorrt/fx/__init__.py‎
Lines changed: 10 additions & 0 deletions b/‎py/torch_tensorrt/fx/__init__.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎py/torch_tensorrt/fx/converter_registry.py‎
Lines changed: 31 additions & 0 deletions b/‎py/torch_tensorrt/fx/converter_registry.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎py/torch_tensorrt/fx/converters/__init__.py‎
Lines changed: 18 additions & 0 deletions b/‎py/torch_tensorrt/fx/converters/__init__.py‎
Lines changed: 18 additions & 0 deletions
@@ -17,6 +17,7 @@
 from torch_tensorrt._Input import Input
 from torch_tensorrt._Device import Device
 
+from torch_tensorrt import fx
 
 def _register_with_torch():
     trtorch_dir = os.path.dirname(__file__)
 
@@ -5,7 +5,7 @@
 import torch
 from torch import fx
 from enum import Enum
-
+from torch_tensorrt import fx
 
 class _IRType(Enum):
     """Enum to set the minimum required logging level to print a message to stdout
@@ -43,13 +43,7 @@ def _get_target_ir(module_type: _ModuleType, ir: str) -> _IRType:
     if module_is_tsable and ir_targets_torchscript:
         return _IRType.ts
     elif module_is_fxable and ir_targets_fx:
-        if module_type == _ModuleType.fx:
-            raise ValueError("Was given a torch.fx.GraphModule, fx is not currently supported by Torch-TensorRT")
-        elif ir_targets_fx:
-            raise ValueError("Preferred ir was set to \"fx\" which is currently not supported by Torch-TensorRT")
-        else:
-            raise ValueError("Torch-TensorRT currently does not support fx")
-        # return _IRType.fx
+        return _IRType.fx
     else:
         if ir == "default":
             # Options are listed in order of preference
@@ -114,7 +108,78 @@ def compile(module: Any, ir="default", inputs=[], enabled_precisions=set([_enums
             ts_mod = torch.jit.script(module)
         return torch_tensorrt.ts.compile(ts_mod, inputs=inputs, enabled_precisions=enabled_precisions, **kwargs)
     elif target_ir == _IRType.fx:
-        raise RuntimeError("fx is currently not supported")
+        from torch_tensorrt.fx.tracer.acc_tracer import acc_tracer
+        from torch_tensorrt.fx import InputTensorSpec
+        from torch_tensorrt.fx import TRTInterpreter
+        from torch_tensorrt.fx.passes.lower_basic_pass import transform_setitem
+        from torch_tensorrt.fx.tools.trt_splitter import TRTSplitter
+        from torch_tensorrt.fx.tools.trt_splitter import TRTSplitterSetting
+        from torch_tensorrt.fx.trt_module import TRTModule
+        from torch_tensorrt.fx.utils import LowerPrecision
+        acc_model = acc_tracer.trace(module, inputs)
+
+        splitter_setting = TRTSplitterSetting()
+        splitter_setting.use_implicit_batch_dim = False
+        splitter = TRTSplitter(acc_model, inputs, settings=splitter_setting)
+        splitter.node_support_preview()
+        split_mod = splitter()
+        num_piece = 0
+        for name, _ in split_mod.named_children():
+            print(f"graph is split into {name}")
+            num_piece += 1
+
+        # if the graph module is split into pieces larger than 8, we consider its perf
+        # is not good and fall back to non-TRT
+        if num_piece > 8:
+            print(
+                f"The graph module is split into {num_piece} which is large than the \
+                threshold=8. Fall back to non-TRT module."
+            )
+            return None
+
+        if torch.float16 in enabled_precisions or torch.half in enabled_precisions:
+            precision = LowerPrecision.FP16
+        else:
+            precision = LowerPrecision.FP32
+
+        def get_submod_inputs(mod, submod, inputs):
+            acc_inputs = None
+
+            def get_input(self, inputs):
+                nonlocal acc_inputs
+                acc_inputs = inputs
+
+            handle = submod.register_forward_pre_hook(get_input)
+            mod(*inputs)
+            handle.remove()
+            return acc_inputs
+
+        for name, _ in split_mod.named_children():
+            if "_run_on_acc" in name:
+                submod = getattr(split_mod, name)
+                # Get submodule inputs for fx2trt
+                acc_inputs = get_submod_inputs(split_mod, submod, inputs)
+
+                # fx2trt replacement
+                interp = TRTInterpreter(
+                    submod,
+                    InputTensorSpec.from_tensors(acc_inputs),
+                    explicit_batch_dimension=True,
+                )
+                r = interp.run(
+                    max_workspace_size=20 << 30,
+                    lower_precision=precision,
+                    # profiling_verbosity=trt.ProfilingVerbosity.DETAILED, #For profile
+                )
+                # For profile
+                # from fx2trt_oss.fx.tools.trt_profiler_sorted import profile_trt_module
+                # profile_trt_module("", trt_mod, acc_inputs)
+                trt_mod = TRTModule(*r)
+
+                setattr(split_mod, name, trt_mod)
+            else:
+                submod = getattr(split_mod, name)
+        return split_mod
     else:
         raise RuntimeError("Module is an unknown format or the ir requested is unknown")
 
@@ -173,4 +238,4 @@ def convert_method_to_trt_engine(module: Any,
     elif target_ir == _IRType.fx:
         raise RuntimeError("fx is currently not supported")
     else:
-        raise RuntimeError("Module is an unknown format or the ir requested is unknown")
+        raise RuntimeError("Module is an unknown format or the ir requested is unknown")
@@ -0,0 +1,10 @@
+from .converters import *  # noqa: F403 F401
+from .converter_registry import (  # noqa
+    CONVERTERS,
+    NO_EXPLICIT_BATCH_DIM_SUPPORT,
+    NO_IMPLICIT_BATCH_DIM_SUPPORT,
+    tensorrt_converter,
+)
+from .fx2trt import TRTInterpreter, TRTInterpreterResult  # noqa
+from .input_tensor_spec import InputTensorSpec  # noqa
+from .trt_module import TRTModule  # noqa
@@ -0,0 +1,31 @@
+from typing import Any, Callable, Dict
+
+from torch.fx.node import Target
+
+
+CONVERTERS: Dict[Target, Any] = {}
+NO_IMPLICIT_BATCH_DIM_SUPPORT = {}
+NO_EXPLICIT_BATCH_DIM_SUPPORT = {}
+
+
+def tensorrt_converter(
+    key: Target,
+    no_implicit_batch_dim: bool = False,
+    no_explicit_batch_dim: bool = False,
+    enabled: bool = True,
+) -> Callable[[Any], Any]:
+    def register_converter(converter):
+        CONVERTERS[key] = converter
+        if no_implicit_batch_dim:
+            NO_IMPLICIT_BATCH_DIM_SUPPORT[key] = converter
+        if no_explicit_batch_dim:
+            NO_EXPLICIT_BATCH_DIM_SUPPORT[key] = converter
+        return converter
+
+    def disable_converter(converter):
+        return converter
+
+    if enabled:
+        return register_converter
+    else:
+        return disable_converter
@@ -0,0 +1,18 @@
+# @manual=//deeplearning/trt/python:py_tensorrt
+import tensorrt as trt
+
+if hasattr(trt, "__version__"):
+    from .activation import *  # noqa: F401 F403
+    from .adaptive_avgpool import *  # noqa: F401 F403
+    from .add import *  # noqa: F401 F403
+    from .batchnorm import *  # noqa: F401 F403
+    from .convolution import *  # noqa: F401 F403
+    from .linear import *  # noqa: F401 F403
+    from .maxpool import *  # noqa: F401 F403
+    from .mul import *  # noqa: F401 F403
+    from .transformation import *  # noqa: F401 F403
+    from .quantization import *  # noqa: F401 F403
+    from .acc_ops_converters import *  # noqa: F401 F403
+
+    TRT_LOGGER = trt.Logger()
+    trt.init_libnvinfer_plugins(TRT_LOGGER, "")