pytorch
diff --git a/‎.circleci/config.yml
Lines changed: 67 additions & 27 deletions b/‎.circleci/config.yml
Lines changed: 67 additions & 27 deletions
diff --git a/‎WORKSPACE
Lines changed: 4 additions & 4 deletions b/‎WORKSPACE
Lines changed: 4 additions & 4 deletions
diff --git a/‎core/conversion/converters/impl/batch_norm.cpp
Lines changed: 18 additions & 5 deletions b/‎core/conversion/converters/impl/batch_norm.cpp
Lines changed: 18 additions & 5 deletions
diff --git a/‎core/conversion/converters/impl/select.cpp
Lines changed: 23 additions & 0 deletions b/‎core/conversion/converters/impl/select.cpp
Lines changed: 23 additions & 0 deletions
diff --git a/‎core/conversion/converters/impl/unary.cpp
Lines changed: 15 additions & 0 deletions b/‎core/conversion/converters/impl/unary.cpp
Lines changed: 15 additions & 0 deletions
diff --git a/‎core/conversion/converters/impl/unsqueeze.cpp
Lines changed: 1 addition & 1 deletion b/‎core/conversion/converters/impl/unsqueeze.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎cpp/src/compile_spec.cpp
Lines changed: 5 additions & 3 deletions b/‎cpp/src/compile_spec.cpp
Lines changed: 5 additions & 3 deletions
diff --git a/‎docs/_cpp_api/classtorch__tensorrt_1_1DataType.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/classtorch__tensorrt_1_1DataType.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html
Lines changed: 2 additions & 2 deletions b/‎docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html
Lines changed: 2 additions & 2 deletions
@@ -263,7 +263,7 @@ commands:
     parameters:
       torch-build:
         type: string
-        default: "2.0.0.dev20230129+cu117"
+        default: "2.0.0.dev20230219+cu117"
       torch-build-index:
         type: string
         default: "https://download.pytorch.org/whl/nightly/cu117"
@@ -463,7 +463,7 @@ commands:
       - run:
           name: Run core / C++ tests
           environment:
-            LD_LIBRARY_PATH: "/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda/lib64/:$LD_LIBRARY_PATH"
+            LD_LIBRARY_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch_tensorrt.libs:/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda/lib64/:$LD_LIBRARY_PATH"
           command: |
             set -e
             mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE
@@ -719,7 +719,7 @@ jobs:
           at: /tmp/dist
       - run:
           name: "Install torch-tensorrt"
-          command: pip3 install /tmp/dist/x86_64-linux/*
+          command: pip3 install /tmp/dist/x86_64-linux/*cp39-cp39*.whl
       - dump-test-env
       - test-ts-core
 
@@ -747,7 +747,7 @@ jobs:
           torch-build-index: << parameters.torch-build-index >>
       - run:
           name: "Install torch-tensorrt"
-          command: pip3 install --pre /tmp/dist/x86_64-linux/*
+          command: pip3 install --pre /tmp/dist/x86_64-linux/*cp39-cp39*.whl
       - dump-test-env
       - test-ts-py-api
 
@@ -777,7 +777,7 @@ jobs:
       #     command: export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
       - run:
           name: "Install torch-tensorrt"
-          command: pip3 install --pre /tmp/dist/x86_64-linux/*
+          command: pip3 install --pre /tmp/dist/x86_64-linux/*cp39-cp39*.whl
       # We install torch after torch-trt because pip automatically enforces the version constraint otherwise
       - dump-test-env
       - test-fx
@@ -791,9 +791,10 @@ jobs:
         type: string
       torch-build-index:
         type: string
+    parallelism: 4
     machine:
       image: ubuntu-2004-cuda-11.4:202110-01
-    resource_class: xlarge
+    resource_class: gpu.nvidia.small
     steps:
       - when:
           condition: << parameters.enabled >>
@@ -805,18 +806,58 @@ jobs:
                   cd ~/project/py/
                   docker build -t torch_tensorrt_release_env --build-arg trt_version=<< pipeline.parameters.trt-version-short >> -f ci/Dockerfile.ci .
             - run:
-                name: Build Python packages and pre-cxx11-abi tarball
+                name: Build Python packages
                 command: |
                   cd ~/project/py/
                   cp ~/project/toolchains/ci_workspaces/WORKSPACE.x86_64.release.rhel ~/project/WORKSPACE
-                  docker run -it --rm -v ~/project:/workspace/project torch_tensorrt_release_env /bin/bash /workspace/project/py/ci/build_whl.sh
+                  docker run -it --rm --gpus all -v ~/project:/workspace/project torch_tensorrt_release_env /bin/bash -c "source /workspace/project/py/ci/build_whl.sh && $(circleci tests split --total=4 ~/project/py/ci/build_manifest.txt)"
+            - run:
+                name: Collect packages
+                command: |
+                  mkdir -p /tmp/dist/x86_64-linux
+                  cp -r ~/project/py/wheelhouse/* /tmp/dist/x86_64-linux
+            - persist_to_workspace:
+                root: /tmp/dist
+                paths:
+                  - x86_64-linux
+            - store_artifacts:
+                path: /tmp/dist/x86_64-linux
+                destination: x86_64-linux
+      - unless:
+          condition: << parameters.enabled >>
+          steps:
+            - run:
+                name: Skipped packaging
+                command: echo -e "Packaging stage not enabled"
+
+  # TODO: Merge this with above
+  package-x86_64-linux-cxx11-abi:
+    parameters:
+      enabled:
+        type: boolean
+        default: false
+      torch-build:
+        type: string
+      torch-build-index:
+        type: string
+    machine:
+      image: ubuntu-2004-cuda-11.4:202110-01
+    resource_class: xlarge
+    steps:
+      - when:
+          condition: << parameters.enabled >>
+          steps:
+            - checkout
             - create-env:
                 os: "ubuntu2004"
                 platform: "x86_64"
                 cudnn-version: << pipeline.parameters.cudnn-version >>
                 trt-version-short:  << pipeline.parameters.trt-version-short >>
                 bazel-version: << pipeline.parameters.bazel-version >>
                 bazel-platform: "x86_64"
+            - install-torch-from-index:
+                torch-build:  << parameters.torch-build >>
+                torch-build-index: << parameters.torch-build-index >>
             - run:
                 name: Build cxx11-abi tarball
                 command: |
@@ -825,13 +866,16 @@ jobs:
                   cp ~/project/toolchains/ci_workspaces/WORKSPACE.x86_64.release.ubuntu ~/project/WORKSPACE
                   bazel build //:libtorchtrt -c opt --noshow_progress
                   sudo chown -R $(whoami) ~/project/py
-                  CUDA_VERSION=$(cd ~/project/py/torch_tensorrt && python3 -c "from _version import __cuda_version__;print(__cuda_version__)")
-                  TORCHTRT_VERSION=$(cd ~/project/py/torch_tensorrt && python3 -c "from _version import __version__;print(__version__)")
-                  TRT_VERSION=$(cd ~/project/py/torch_tensorrt && python3 -c "from _version import __tensorrt_version__;print(__tensorrt_version__)")
-                  CUDNN_VERSION=$(cd ~/project/py/torch_tensorrt && python3 -c "from _version import __cudnn_version__;print(__cudnn_version__)")
+                  cd ~/project/py
+                  CUDA_VERSION=$(python3 -c "from versions import __cuda_version__;print(__cuda_version__)")
+                  TORCHTRT_VERSION=$(python3 -c "from versions import __version__;print(__version__)")
+                  TRT_VERSION=$(python3 -c "from versions import __tensorrt_version__;print(__tensorrt_version__)")
+                  CUDNN_VERSION=$(python3 -c "from versions import __cudnn_version__;print(__cudnn_version__)")
+                  TORCH_VERSION=$(python3 -c "from torch import __version__;print(__version__.split('+')[0])")
                   pip3 install --upgrade pip
                   pip3 install -r ~/project/py/requirements.txt
                   TORCH_VERSION=$(python3 -c "from torch import __version__;print(__version__.split('+')[0])")
+                  mkdir -p ~/project/py/dist/
                   cp ~/project/bazel-bin/libtorchtrt.tar.gz ~/project/py/dist/libtorchtrt-${TORCHTRT_VERSION}-cudnn${CUDNN_VERSION}-tensorrt${TRT_VERSION}-cuda${CUDA_VERSION}-libtorch${TORCH_VERSION}-x86_64-linux.tar.gz
             - run:
                 name: Collect packages
@@ -1026,7 +1070,7 @@ parameters:
   # Nightly platform config
   torch-build:
     type: string
-    default: "2.0.0.dev20230129+cu117"
+    default: "2.0.0.dev20230219+cu117"
   torch-build-index:
     type: string
     default: "https://download.pytorch.org/whl/nightly/cu117"
@@ -1113,7 +1157,13 @@ workflows:
     when: << pipeline.parameters.enable-packaging >>
     jobs:
 
-      - build-x86_64-linux:
+      - package-x86_64-linux:
+          enabled: << pipeline.parameters.enable-packaging >>
+          torch-build: << pipeline.parameters.torch-build >>
+          torch-build-index: << pipeline.parameters.torch-build-index >>
+
+      - package-x86_64-linux-cxx11-abi:
+          enabled: << pipeline.parameters.enable-packaging >>
           torch-build: << pipeline.parameters.torch-build >>
           torch-build-index: << pipeline.parameters.torch-build-index >>
 
@@ -1124,31 +1174,21 @@ workflows:
           trt-version-long: << pipeline.parameters.trt-version-long >>
           cudnn-version: << pipeline.parameters.cudnn-version >>
           requires:
-            - build-x86_64-linux
+            - package-x86_64-linux
 
       - test-py-ts-x86_64-linux:
           torch-build: << pipeline.parameters.torch-build >>
           torch-build-index: << pipeline.parameters.torch-build-index >>
           trt-version-long: << pipeline.parameters.trt-version-long >>
           requires:
-            - build-x86_64-linux
+            - package-x86_64-linux
 
       - test-py-fx-x86_64-linux:
           torch-build: << pipeline.parameters.torch-build >>
           torch-build-index: << pipeline.parameters.torch-build-index >>
           trt-version-long: << pipeline.parameters.trt-version-long >>
           requires:
-            - build-x86_64-linux
-
-
-      - package-x86_64-linux:
-          enabled: << pipeline.parameters.enable-packaging >>
-          torch-build: << pipeline.parameters.torch-build >>
-          torch-build-index: << pipeline.parameters.torch-build-index >>
-          #requires:
-            #- test-core-cpp-x86_64-linux
-            #- test-py-ts-x86_64-linux
-            #- test-py-fx-x86_64-linux
+            - package-x86_64-linux
 
   on-push:
     jobs:
 
@@ -56,17 +56,17 @@ new_local_repository(
 http_archive(
     name = "libtorch",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "59b8b5e1954a86d50b79c13f06398d385b200da13e37a08ecf31d3c62e5ca127",
+    sha256 = "8b3b48615169c83c1b643c0efade078ea080b1da598e15fcf01bc59421f3095e",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-cxx11-abi-shared-with-deps-2.0.0.dev20230103%2Bcu117.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-cxx11-abi-shared-with-deps-2.0.0.dev20230219%2Bcu117.zip"],
 )
 
 http_archive(
     name = "libtorch_pre_cxx11_abi",
     build_file = "@//third_party/libtorch:BUILD",
-    sha256 = "e260fc7476be89d1650953e8643e9f7363845f5a52de4bab87ac0e619c1f6ad4",
+    sha256 = "aa7fd06079d260ff83c344d043fb84fbd9cf831cf375ed8b5a1b62416817af31",
     strip_prefix = "libtorch",
-    urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-shared-with-deps-2.0.0.dev20230103%2Bcu117.zip"],
+    urls = ["https://download.pytorch.org/libtorch/nightly/cu117/libtorch-shared-with-deps-2.0.0.dev20230219%2Bcu117.zip"],
 )
 
 # Download these tarballs manually from the NVIDIA website
 
@@ -20,15 +20,28 @@ void _batch_norm(
     const torch::Tensor& mean,
     const torch::Tensor& var,
     const float eps) {
-  auto scale = gamma / torch::sqrt(var + eps);
-  auto bias = beta - mean * scale;
+  auto orig_dtype = var.dtype();
+  // perform compile-time weight calculations in float to improve accuracy
+  // resulting weights will be embedded as the original dtype
+  auto calculation_gamma = gamma;
+  auto calculation_beta = beta;
+  auto calculation_mean = mean;
+  auto calculation_var = var;
+  if (orig_dtype == torch::kHalf) {
+    calculation_gamma = calculation_gamma.to(torch::kFloat);
+    calculation_beta = calculation_beta.to(torch::kFloat);
+    calculation_mean = calculation_mean.to(torch::kFloat);
+    calculation_var = calculation_var.to(torch::kFloat);
+  }
+  auto scale = calculation_gamma / torch::sqrt(calculation_var + eps);
+  auto bias = calculation_beta - calculation_mean * scale;
   LOG_DEBUG("_batch_norm Tensor Scale : " << scale.sizes());
   LOG_DEBUG("_batch_norm Tensor bias : " << bias.sizes());
 
-  auto scale_weights = Weights(ctx, scale);
-  auto bias_weights = Weights(ctx, bias);
+  auto scale_weights = Weights(ctx, scale.to(orig_dtype));
+  auto bias_weights = Weights(ctx, bias.to(orig_dtype));
 
-  auto power = Weights(ctx, at::ones_like(scale));
+  auto power = Weights(ctx, at::ones_like(scale).to(orig_dtype));
   auto bn =
       ctx->net->addScaleNd(*input, nvinfer1::ScaleMode::kCHANNEL, bias_weights.data, scale_weights.data, power.data, 1);
   bn->setName(util::node_info(n).c_str());
 
@@ -180,6 +180,29 @@ auto select_registrations TORCHTRT_UNUSED =
                return true;
              }})
         .pattern(
+            {"aten::index_select(Tensor self, int dim, Tensor index) -> Tensor",
+             [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+               auto in = args[0].ITensorOrFreeze(ctx);
+               auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
+               auto dim = args[1].unwrapToInt();
+               // Handle negative axis by refering to nbDims of input Tensor
+               dim = dim < 0 ? dim + maxDim : dim;
+               auto index = args[2].ITensorOrFreeze(ctx);
+
+               LOG_DEBUG("Gather input dimensions: " << in->getDimensions());
+               LOG_DEBUG("Dimension to select: " << dim);
+               LOG_DEBUG("Index dimensions: " << index->getDimensions());
+
+               auto gather_layer = ctx->net->addGather(*in, *index, dim);
+               TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+               auto out = gather_layer->getOutput(0);
+               LOG_DEBUG("Gather tensor shape: " << out->getDimensions());
+
+               out = ctx->AssociateValueAndTensor(n->outputs()[0], out);
+               LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+               return true;
+             }})
+        .pattern(
             {"aten::narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)",
              [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
                auto in = args[0].ITensor();
 
@@ -34,6 +34,21 @@ auto reciprocal_registration TORCHTRT_UNUSED = RegisterNodeConversionPatterns().
        return true;
      }});
 
+auto logical_not_registration TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pattern(
+    {"aten::logical_not(Tensor self) -> Tensor", [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+       auto in = args[0].ITensorOrFreeze(ctx);
+       if (in->getType() != nvinfer1::DataType::kBOOL) {
+         // unary not layer only supports bool inputs
+         in = castITensor(ctx, in, nvinfer1::DataType::kBOOL, util::node_info(n).c_str());
+       }
+       auto unary_layer = ctx->net->addUnary(*in, nvinfer1::UnaryOperation::kNOT);
+       TORCHTRT_CHECK(unary_layer, "Unable to create logical_not layer from node: " << *n);
+       unary_layer->setName(util::node_info(n).c_str());
+       auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], unary_layer->getOutput(0));
+       LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
+       return true;
+     }});
+
 #define convert(unary, trt_type)                                                               \
   auto unary##_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns().pattern(       \
       {"aten::" #unary "(Tensor self) -> Tensor",                                              \
 
@@ -32,7 +32,7 @@ auto unsqueeze_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns().
 
        auto shuffle_layer = ctx->net->addShuffle(*self);
        TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
-       shuffle_layer->setReshapeDimensions(util::unsqueezeDims(self->getDimensions(), dim));
+       shuffle_layer->setReshapeDimensions(util::unsqueezeDims(self->getDimensions(), dim, 1, false));
 
        auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_layer->getOutput(0));
 
 
@@ -36,13 +36,15 @@ CompileSpec::CompileSpec(torch::jit::IValue input_signature) {
   graph_inputs.input_signature = input_signature;
 }
 
-void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue) {
+void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IValue& converted_ivalue, int depth = 0) {
+  TORCHTRT_CHECK(
+      depth <= 2, "Input nesting depth exceeds max supported depth, use 1 level: [A, B], or 2 level: [A, (B, C)]")
   if (input_ivalue.isTuple()) {
     auto input_tuple = input_ivalue.toTuple();
     std::vector<torch::jit::IValue> converted_elements;
     for (auto item : input_tuple->elements()) {
       torch::jit::IValue converted_item;
-      to_internal_input_signature(item, converted_item);
+      to_internal_input_signature(item, converted_item, depth++);
       converted_elements.push_back(converted_item);
       auto tuple_ptr = c10::ivalue::Tuple::create(converted_elements);
       converted_ivalue = torch::jit::IValue(tuple_ptr);
@@ -53,7 +55,7 @@ void to_internal_input_signature(torch::jit::IValue input_ivalue, torch::jit::IV
     auto converted_elements = c10::impl::GenericList(type);
     for (auto item : input_list) {
       torch::jit::IValue converted_item;
-      to_internal_input_signature(item, converted_item);
+      to_internal_input_signature(item, converted_item, depth++);
       converted_elements.push_back(converted_item);
     }
     converted_ivalue = torch::jit::IValue(converted_elements);
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Class DataType &mdash; Torch-TensorRT v1.4.0dev0+3d59933 documentation</title>
+  <title>Class DataType &mdash; Torch-TensorRT v1.4.0.dev0+b388010 documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+3d59933
+                  v1.4.0.dev0+b388010
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Class Device::DeviceType &mdash; Torch-TensorRT v1.4.0dev0+3d59933 documentation</title>
+  <title>Class Device::DeviceType &mdash; Torch-TensorRT v1.4.0.dev0+b388010 documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+3d59933
+                  v1.4.0.dev0+b388010
                 </div>
 
 
 
@@ -10,7 +10,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>Class TensorFormat &mdash; Torch-TensorRT v1.4.0dev0+3d59933 documentation</title>
+  <title>Class TensorFormat &mdash; Torch-TensorRT v1.4.0.dev0+b388010 documentation</title>
 
 
 
@@ -215,7 +215,7 @@
 
 
                 <div class="version">
-                  v1.4.0dev0+3d59933
+                  v1.4.0.dev0+b388010
                 </div>