pytorch
diff --git a/‎README.md
Lines changed: 5 additions & 4 deletions b/‎README.md
Lines changed: 5 additions & 4 deletions
diff --git a/‎core/lowering/lowering.cpp
Lines changed: 1 addition & 0 deletions b/‎core/lowering/lowering.cpp
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/lowering/passes/BUILD
Lines changed: 1 addition & 0 deletions b/‎core/lowering/passes/BUILD
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/lowering/passes/passes.h
Lines changed: 1 addition & 0 deletions b/‎core/lowering/passes/passes.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/lowering/passes/view_to_reshape.cpp
Lines changed: 31 additions & 0 deletions b/‎core/lowering/passes/view_to_reshape.cpp
Lines changed: 31 additions & 0 deletions
diff --git a/‎core/partitioning/partitioning.cpp
Lines changed: 1 addition & 1 deletion b/‎core/partitioning/partitioning.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/_images/_notebooks_EfficientNet-example_11_0.png
0 Bytes b/‎docs/_images/_notebooks_EfficientNet-example_11_0.png
0 Bytes
diff --git a/‎docs/_images/_notebooks_EfficientNet-example_14_2.png
97.5 KB b/‎docs/_images/_notebooks_EfficientNet-example_14_2.png
97.5 KB
diff --git a/‎docs/_images/_notebooks_Resnet50-example_10_0.png
0 Bytes b/‎docs/_images/_notebooks_Resnet50-example_10_0.png
0 Bytes
diff --git a/‎docs/_images/_notebooks_Resnet50-example_13_1.png
0 Bytes b/‎docs/_images/_notebooks_Resnet50-example_13_1.png
0 Bytes
@@ -6,10 +6,11 @@
 
 Torch-TensorRT is a compiler for PyTorch/TorchScript, targeting NVIDIA GPUs via NVIDIA's TensorRT Deep Learning Optimizer and Runtime. Unlike PyTorch's Just-In-Time (JIT) compiler, Torch-TensorRT is an Ahead-of-Time (AOT) compiler, meaning that before you deploy your TorchScript code, you go through an explicit compile step to convert a standard TorchScript program into an module targeting a TensorRT engine. Torch-TensorRT operates as a PyTorch extention and compiles modules that integrate into the JIT runtime seamlessly. After compilation using the optimized graph should feel no different than running a TorchScript module. You also have access to TensorRT's suite of configurations at compile time, so you are able to specify operating precision (FP32/FP16/INT8) and other settings for your module.
 
-More Information / System Architecture:
-
-- [GTC 2020 Talk](https://developer.nvidia.com/gtc/2020/video/s21671)
-
+Resources:
+- [Documentation](https://nvidia.github.io/Torch-TensorRT/)
+- [Torch-TensorRT Explained in 2 minutes!](https://www.youtube.com/watch?v=TU5BMU6iYZ0&ab_channel=NVIDIADeveloper)
+- [Comprehensive Discusion (GTC Event)](https://www.nvidia.com/en-us/on-demand/session/gtcfall21-a31107/)
+- [Pre-built Docker Container](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). To use this container, make an NGC account and sign in to NVIDIA's registry with an API key. Refer to [this guide](https://docs.nvidia.com/ngc/ngc-catalog-user-guide/index.html#registering-activating-ngc-account) for the same.
 
 
 ## Building a docker container for Torch-TensorRT
 
@@ -45,6 +45,7 @@ void LowerGraph(std::shared_ptr<torch::jit::Graph>& g, LowerInfo lower_info) {
   passes::ReduceToOperation(g);
   passes::ReduceGelu(g);
   passes::RemoveContiguous(g);
+  passes::ViewToReshape(g);
   passes::RemoveDropout(g);
   passes::LinearToAddMM(g);
   passes::Conv1DToConvolution(g);
 
@@ -20,6 +20,7 @@ cc_library(
         "reduce_gelu.cpp",
         "remove_bn_dim_check.cpp",
         "remove_contiguous.cpp",
+        "view_to_reshape.cpp",
         "remove_dropout.cpp",
         "remove_nops.cpp",
         "silu_to_sigmoid_multiplication.cpp",
 
@@ -24,6 +24,7 @@ void ReduceGelu(std::shared_ptr<torch::jit::Graph>& graph);
 void MarkNodesForFallback(std::shared_ptr<torch::jit::Graph>& g, bool delete_delims);
 void RemoveBNDimCheck(std::shared_ptr<torch::jit::Graph> graph);
 void RemoveContiguous(std::shared_ptr<torch::jit::Graph>& graph);
+void ViewToReshape(std::shared_ptr<torch::jit::Graph>& graph);
 void RemoveDropout(std::shared_ptr<torch::jit::Graph>& graph);
 void RemoveNOPs(std::shared_ptr<torch::jit::Graph> graph);
 void UnpackAddMM(std::shared_ptr<torch::jit::Graph>& graph);
 
@@ -0,0 +1,31 @@
+#include <torch/csrc/jit/passes/subgraph_rewrite.h>
+#include "core/util/prelude.h"
+
+namespace torch_tensorrt {
+namespace core {
+namespace lowering {
+namespace passes {
+
+void ViewToReshape(std::shared_ptr<torch::jit::Graph>& graph) {
+  std::string view_pattern = R"IR(
+        graph(%x, %1):
+            %out : Tensor = aten::view(%x, %1)
+            return (%out))IR";
+
+  std::string reshape_pattern = R"IR(
+        graph(%x, %1):
+            %out : Tensor = aten::reshape(%x, %1)
+            return (%out))IR";
+
+  // replace aten::view with aten::reshape
+  torch::jit::SubgraphRewriter map_view_to_reshape;
+  map_view_to_reshape.RegisterRewritePattern(view_pattern, reshape_pattern);
+  map_view_to_reshape.runOnGraph(graph);
+
+  LOG_GRAPH("Post lowering of aten::view -> " << *graph);
+}
+
+} // namespace passes
+} // namespace lowering
+} // namespace core
+} // namespace torch_tensorrt
@@ -176,7 +176,7 @@ void resolveNonTensorInputs(PartitionedGraph& segmented_blocks) { // , std::shar
     // if the segment that produce this nonTensor value is kTensorRT but consumed in kTorch, inject nodes in the first
     // kTorch segment.
     if (segmented_blocks[use_info.produce_id].target() == SegmentedBlock::kTensorRT && !use_info.torch_use_id.empty()) {
-      auto first_torch_id = use_info.torch_use_id.front();
+      auto first_torch_id = use_info.torch_use_id.back();
       if (!updated_segments.count(first_torch_id)) {
         // Segmented Blocks with non-tensor inputs will have to be re-segmented as
         // Torch-TensorRT doesn't support non-tensor inputs for a module.