pytorch
diff --git a/‎.circleci/config.yml
Lines changed: 639 additions & 53 deletions b/‎.circleci/config.yml
Lines changed: 639 additions & 53 deletions
diff --git a/‎.github/code-owners.yml
Lines changed: 1 addition & 1 deletion b/‎.github/code-owners.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 5 additions & 4 deletions b/‎README.md
Lines changed: 5 additions & 4 deletions
diff --git a/‎core/conversion/converters/converter_util.cpp
Lines changed: 125 additions & 0 deletions b/‎core/conversion/converters/converter_util.cpp
Lines changed: 125 additions & 0 deletions
diff --git a/‎core/conversion/converters/converter_util.h
Lines changed: 30 additions & 0 deletions b/‎core/conversion/converters/converter_util.h
Lines changed: 30 additions & 0 deletions
@@ -110,7 +110,7 @@
   - "peri044"
   - "bowang007"
 
-"component: docker":
+"channel: docker":
   - "andi4191"
   - "narendasan"
 
 
@@ -62,3 +62,6 @@ bazel-Torch-TensorRT-Preview
 docsrc/src/
 bazel-TensorRT
 bazel-tensorrt
+.pytest_cache
+*.cache
+*cifar-10-batches-py*
@@ -2,13 +2,14 @@
 
 [![Documentation](https://img.shields.io/badge/docs-master-brightgreen)](https://nvidia.github.io/Torch-TensorRT/)
 
-> Ahead of Time (AOT) compiling for PyTorch JIT
+> Ahead of Time (AOT) compiling for PyTorch JIT and FX
 
-Torch-TensorRT is a compiler for PyTorch/TorchScript, targeting NVIDIA GPUs via NVIDIA's TensorRT Deep Learning Optimizer and Runtime. Unlike PyTorch's Just-In-Time (JIT) compiler, Torch-TensorRT is an Ahead-of-Time (AOT) compiler, meaning that before you deploy your TorchScript code, you go through an explicit compile step to convert a standard TorchScript program into an module targeting a TensorRT engine. Torch-TensorRT operates as a PyTorch extention and compiles modules that integrate into the JIT runtime seamlessly. After compilation using the optimized graph should feel no different than running a TorchScript module. You also have access to TensorRT's suite of configurations at compile time, so you are able to specify operating precision (FP32/FP16/INT8) and other settings for your module.
+Torch-TensorRT is a compiler for PyTorch/TorchScript/FX, targeting NVIDIA GPUs via NVIDIA's TensorRT Deep Learning Optimizer and Runtime. Unlike PyTorch's Just-In-Time (JIT) compiler, Torch-TensorRT is an Ahead-of-Time (AOT) compiler, meaning that before you deploy your TorchScript code, you go through an explicit compile step to convert a standard TorchScript or FX program into an module targeting a TensorRT engine. Torch-TensorRT operates as a PyTorch extention and compiles modules that integrate into the JIT runtime seamlessly. After compilation using the optimized graph should feel no different than running a TorchScript module. You also have access to TensorRT's suite of configurations at compile time, so you are able to specify operating precision (FP32/FP16/INT8) and other settings for your module.
 
 Resources:
 - [Documentation](https://nvidia.github.io/Torch-TensorRT/)
-- [Torch-TensorRT Explained in 2 minutes!](https://www.youtube.com/watch?v=TU5BMU6iYZ0&ab_channel=NVIDIADeveloper)
+- [FX path Documentation](https://github.com/pytorch/TensorRT/blob/master/docsrc/tutorials/getting_started_with_fx_path.rst)
+- [Torch-TensorRT Explained in 2 minutes!](https://www.youtube.com/watch?v=TU5BMU6iYZ0&ab_channel=NVIDIADeveloper) 
 - [Comprehensive Discusion (GTC Event)](https://www.nvidia.com/en-us/on-demand/session/gtcfall21-a31107/)
 - [Pre-built Docker Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). To use this container, make an NGC account and sign in to NVIDIA's registry with an API key. Refer to [this guide](https://docs.nvidia.com/ngc/ngc-catalog-user-guide/index.html#registering-activating-ngc-account) for the same.
 
@@ -213,7 +214,7 @@ bazel build //:libtorchtrt --compilation_mode opt
 ```
 
 ### FX path (Python only) installation
-If the user plan to try FX path (Python only) and would like to avoid bazel build. Please follow the steps below.
+If the user plans to try FX path (Python only) and would like to avoid bazel build. Please follow the steps below.
 ``` shell
 cd py && python3 setup.py install --fx-only
 ```
 
@@ -200,6 +200,131 @@ nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t, const std::
   return out;
 }
 
+// clamp x to [lower_bound, upper_bound]
+nvinfer1::ITensor* clamp(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* x,
+    nvinfer1::ITensor* lower_bound,
+    nvinfer1::ITensor* upper_bound,
+    std::string const& name) {
+
+  auto max_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMAX, x, lower_bound, "max layer for " + name);
+  TORCHTRT_CHECK(max_layer, "Unable to create max layer for clamp");
+  LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp");
+  auto max_itensor = max_layer->getOutput(0);
+
+  auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+  TORCHTRT_CHECK(min_layer, "Unable to create min layer for clamp");
+  LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp");
+  auto min_itensor = min_layer->getOutput(0);
+  return min_itensor;
+}
+
+// clamp x to [0, input_dim]
+nvinfer1::ITensor* clamp_to_input_dim(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* x,
+    nvinfer1::ITensor* input_dim,
+    int nbdims,
+    std::string const& name) {
+
+  auto zero = torch::zeros({nbdims}).to(torch::kI32);
+  auto zero_itensor = tensor_to_const(ctx, zero);
+  auto one = torch::ones({nbdims}).to(torch::kI32);
+  auto one_itensor = tensor_to_const(ctx, one);
+
+  auto upper_bound_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
+  TORCHTRT_CHECK(upper_bound_layer, "Unable to create sub layer for clamp to inputDim");
+  LOG_DEBUG(ctx->logger, "Create " << upper_bound_layer->getName() << " for clamp to inputDim");
+  auto upper_bound = upper_bound_layer->getOutput(0);
+
+  auto max_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMAX, x, zero_itensor, "max layer for " + name);
+  TORCHTRT_CHECK(max_layer, "Unable to create max_layer for clamp to inputDim");
+  LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp to inputDim");
+  auto max_itensor = max_layer->getOutput(0);
+
+  auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+  TORCHTRT_CHECK(min_layer, "Unable to create min_layer for clamp to inputDim");
+  LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp to inputDim");
+  auto min_itensor = min_layer->getOutput(0);
+  return min_itensor;
+}
+
+// return indices < 0 ? inputDims + indices : indices
+nvinfer1::ITensor* normalize_indices(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* input_dim,
+    nvinfer1::ITensor* indices,
+    int nbdims,
+    std::string const& name) {
+
+  auto zero = torch::zeros({nbdims}).to(torch::kI32);
+  auto neg = -torch::ones({nbdims}).to(torch::kI32);
+  auto zero_itensor = tensor_to_const(ctx, zero);
+  auto neg_itensor = tensor_to_const(ctx, neg);
+  // find the indices that = -1
+  auto signs = clamp(ctx, indices, neg_itensor, zero_itensor, "clamp layer for " + name);
+
+  // get the inputDim value where indices == -1, else 0
+  auto mul = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kPROD, signs, input_dim, "prod layer for " + name);
+  TORCHTRT_CHECK(mul, "Unable to create mul layer in normalize_indices");
+  LOG_DEBUG(ctx->logger, "Create " << mul->getName() << " for normalize_indices");
+  auto mul_itensor = mul->getOutput(0);
+
+  // add the inputDim value to indices where indices == -1
+  auto sub = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, indices, mul_itensor, "sub layer for " + name);
+  TORCHTRT_CHECK(sub, "Unable to create sub layer in normalize_indices");
+  LOG_DEBUG(ctx->logger, "Create " << sub->getName() << " for normalize_indices");
+  auto sub_itensor = sub->getOutput(0);
+  return sub_itensor;
+}
+
+std::vector<nvinfer1::ITensor*> normalize_start_and_end(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* in_shape,
+    nvinfer1::ITensor* in_start,
+    nvinfer1::ITensor* in_end,
+    int nbdims,
+    std::string const& name) {
+  auto start = normalize_indices(ctx, in_shape, in_start, nbdims, "normalize start of " + name);
+  auto out_start = clamp_to_input_dim(ctx, start, in_shape, nbdims, "clamp start to inputDim for " + name);
+  auto end = normalize_indices(ctx, in_shape, in_end, nbdims, "normalize end of " + name);
+  auto out_end = clamp_to_input_dim(ctx, end, in_shape, nbdims, "clamp end to inputDim for " + name);
+  std::vector<nvinfer1::ITensor*> outputs;
+  outputs.push_back(out_start);
+  outputs.push_back(out_end);
+  return outputs;
+}
+
+// size = (end - start) / stride + 1, where range is [start, end], end is included
+nvinfer1::ITensor* get_slice_size(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* start,
+    nvinfer1::ITensor* end,
+    nvinfer1::ITensor* stride,
+    int nbdims,
+    std::string const& name) {
+  at::Tensor one_tensor = torch::ones({nbdims}).to(torch::kI32);
+  auto one_itensor = tensor_to_const(ctx, one_tensor);
+
+  auto sub_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
+  TORCHTRT_CHECK(sub_layer, "Unable to create sub layer in calculate_output_size");
+  LOG_DEBUG(ctx->logger, "Create " << sub_layer->getName() << " for calculate_output_size");
+  auto sub_itensor = sub_layer->getOutput(0);
+
+  auto div_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
+  TORCHTRT_CHECK(div_layer, "Unable to create div layer in calculate_output_size");
+  LOG_DEBUG(ctx->logger, "Create " << div_layer->getName() << " for calculate_output_size");
+  auto div_itensor = div_layer->getOutput(0);
+
+  auto add_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
+  TORCHTRT_CHECK(add_layer, "Unable to create add layer in calculate_output_size");
+  LOG_DEBUG(ctx->logger, "Create " << add_layer->getName() << " for calculate_output_size");
+  auto size_itensor = add_layer->getOutput(0);
+
+  return size_itensor;
+}
+
 } // namespace converters
 } // namespace conversion
 } // namespace core
 
@@ -2,6 +2,7 @@
 
 #include <map>
 #include <string>
+#include <limits>
 
 #include "core/conversion/conversionctx/ConversionCtx.h"
 #include "core/conversion/converters/Weights.h"
@@ -50,6 +51,35 @@ nvinfer1::ITensor* castITensor(ConversionCtx* ctx, nvinfer1::ITensor* tensor, nv
 // Freeze an at::Tensor in a IConstant layer
 nvinfer1::ITensor* tensor_to_const(ConversionCtx* ctx, at::Tensor t, const std::string& name = std::string());
 
+nvinfer1::ITensor* clamp(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* x,
+    nvinfer1::ITensor* lower_bound,
+    nvinfer1::ITensor* upper_bound,
+    std::string const& name);
+
+nvinfer1::ITensor* normalize_indices(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* input_dim,
+    nvinfer1::ITensor* indices,
+    std::string const& name);
+
+std::vector<nvinfer1::ITensor*> normalize_start_and_end(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* in_shape,
+    nvinfer1::ITensor* in_start,
+    nvinfer1::ITensor* in_end,
+    int nbdims,
+    std::string const& name);
+
+nvinfer1::ITensor* get_slice_size(
+    ConversionCtx* ctx,
+    nvinfer1::ITensor* start,
+    nvinfer1::ITensor* end,
+    nvinfer1::ITensor* stride,
+    int nbdims,
+    std::string const& name);
+
 } // namespace converters
 } // namespace conversion
 } // namespace core