Skip to content

Commit bcfbbec

Browse files
author
pytorchbot
committed
2025-08-11 nightly release (40f1a12)
1 parent 7508be3 commit bcfbbec

File tree

249 files changed

+832
-2617
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

249 files changed

+832
-2617
lines changed

.github/workflows/build-test-linux-aarch64-jetpack.yml

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
name: Build and test Linux aarch64 wheels for Jetpack
22

33
on:
4-
# TODO: Uncomment this when we have a stable release
5-
# pull_request:
6-
# push:
7-
# branches:
8-
# - main
9-
# - nightly
10-
# - release/*
11-
# tags:
12-
# # NOTE: Binary build pipelines should only get triggered on release candidate builds
13-
# # Release candidate tags look like: v1.11.0-rc1
14-
# - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
4+
pull_request:
5+
push:
6+
branches:
7+
- main
8+
- nightly
9+
- release/*
10+
tags:
11+
# NOTE: Binary build pipelines should only get triggered on release candidate builds
12+
# Release candidate tags look like: v1.11.0-rc1
13+
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
1514
workflow_dispatch:
1615

1716
jobs:

.github/workflows/build_wheels_linux_aarch64.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ jobs:
264264
if [[ ${{ inputs.is-jetpack }} == false ]]; then
265265
${CONDA_RUN} python setup.py bdist_wheel
266266
else
267-
${CONDA_RUN} python setup.py bdist_wheel --jetpack --plat-name=linux_tegra_aarch64
267+
${CONDA_RUN} python setup.py bdist_wheel --jetpack
268268
fi
269269
- name: Repair Manylinux_2_28 Wheel
270270
shell: bash -l {0}
@@ -337,8 +337,8 @@ jobs:
337337
needs: build
338338
name: upload-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }}-${{ inputs.is-jetpack }}
339339
uses: pytorch/test-infra/.github/workflows/_binary_upload.yml@main
340-
# for jetpack builds, only upload to pytorch index for nightly builds
341-
if: ${{ inputs.is-jetpack == false || (github.event_name == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) }}
340+
# for jetpack builds, do not upload to pytorch nightly index, only upload to https://pypi.jetson-ai-lab.io/ manually for each release
341+
if: ${{ inputs.is-jetpack == false }}
342342
with:
343343
repository: ${{ inputs.repository }}
344344
ref: ${{ inputs.ref }}

MODULE.bazel

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,9 @@ http_archive(
9090
http_archive(
9191
name = "torch_l4t",
9292
build_file = "@//third_party/libtorch:BUILD",
93-
sha256 = "6eff643c0a7acda92734cc798338f733ff35c7df1a4434576f5ff7c66fc97319",
9493
strip_prefix = "torch",
9594
type = "zip",
96-
urls = ["https://pypi.jetson-ai-lab.dev/jp6/cu126/+f/6ef/f643c0a7acda9/torch-2.7.0-cp310-cp310-linux_aarch64.whl"],
95+
urls = ["https://pypi.jetson-ai-lab.io/jp6/cu126/+f/62a/1beee9f2f1470/torch-2.8.0-cp310-cp310-linux_aarch64.whl"],
9796
)
9897

9998
# Download these tarballs manually from the NVIDIA website

core/conversion/conversion.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> input
202202
TORCHTRT_CHECK(
203203
profile->isValid(),
204204
"Optimization profile is invalid, please check the input range provided (conversion.AddInputs)");
205-
206205
ctx->cfg->addOptimizationProfile(profile);
207-
#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)
208-
if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) {
209-
ctx->cfg->setCalibrationProfile(profile);
210-
}
211-
#endif
212206
}
213207

214208
void MarkOutputs(ConversionCtx* ctx, at::ArrayRef<const torch::jit::Value*> outputs) {

core/conversion/conversionctx/ConversionCtx.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
3131
if (s.device.device_type == nvinfer1::DeviceType::kDLA) {
3232
os << "\n DLACore: " << s.device.dla_core;
3333
}
34-
os << "\n Engine Capability: " << s.capability \
35-
<< "\n Calibrator Created: " << (s.calibrator != nullptr);
34+
os << "\n Engine Capability: " << s.capability;
3635
return os;
3736
}
3837
// clang-format on
@@ -64,15 +63,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
6463
cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
6564
break;
6665
case nvinfer1::DataType::kINT8:
67-
TORCHTRT_CHECK(
68-
builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
69-
cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
70-
if (!settings.calibrator) {
71-
LOG_INFO(
72-
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
73-
} else {
74-
cfg->setInt8Calibrator(settings.calibrator);
75-
}
66+
LOG_DEBUG("INT8 precision has been enabled, we assume the network has Q/DQ nodes obtained from modelopt");
7667
break;
7768
case nvinfer1::DataType::kFLOAT:
7869
break;

core/conversion/conversionctx/ConversionCtx.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ struct BuilderSettings {
2626
bool allow_shape_tensors = false;
2727
ir::Device device;
2828
nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
29-
nvinfer1::IInt8Calibrator* calibrator = nullptr;
3029
uint64_t num_avg_timing_iters = 1;
3130
uint64_t workspace_size = 0;
3231
uint64_t dla_sram_size = DLA_SRAM_SIZE;

cpp/BUILD

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,12 @@ cc_library(
77
srcs = [
88
"src/compile_spec.cpp",
99
"src/logging.cpp",
10-
"src/ptq.cpp",
1110
"src/torch_tensorrt.cpp",
1211
"src/types.cpp",
1312
],
1413
hdrs = [
1514
"include/torch_tensorrt/logging.h",
1615
"include/torch_tensorrt/macros.h",
17-
"include/torch_tensorrt/ptq.h",
1816
"include/torch_tensorrt/torch_tensorrt.h",
1917
],
2018
linkstatic = True,

cpp/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,13 @@ add_library(${lib_name} OBJECT)
44
set(CXX_SRCS
55
"${CMAKE_CURRENT_SOURCE_DIR}/src/compile_spec.cpp"
66
"${CMAKE_CURRENT_SOURCE_DIR}/src/logging.cpp"
7-
"${CMAKE_CURRENT_SOURCE_DIR}/src/ptq.cpp"
87
"${CMAKE_CURRENT_SOURCE_DIR}/src/torch_tensorrt.cpp"
98
"${CMAKE_CURRENT_SOURCE_DIR}/src/types.cpp"
109
)
1110

1211
set(HEADER_FILES
1312
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/logging.h"
1413
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/macros.h"
15-
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/ptq.h"
1614
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/torch_tensorrt.h"
1715
)
1816

cpp/bin/torchtrtc/fileio.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
#include "torch/torch.h"
2424

2525
#include "torch_tensorrt/logging.h"
26-
#include "torch_tensorrt/ptq.h"
2726
#include "torch_tensorrt/torch_tensorrt.h"
2827

2928
namespace torchtrtc {

cpp/bin/torchtrtc/main.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include "torch/script.h"
88

99
#include "torch_tensorrt/logging.h"
10-
#include "torch_tensorrt/ptq.h"
1110
#include "torch_tensorrt/torch_tensorrt.h"
1211

1312
#include "accuracy.h"
@@ -335,8 +334,6 @@ int main(int argc, char** argv) {
335334
calibration_cache_file_path = torchtrtc::fileio::resolve_path(args::get(calibration_cache_file));
336335
}
337336

338-
auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path);
339-
340337
compile_settings.require_full_compilation = require_full_compilation;
341338

342339
if (torch_executed_ops || torch_executed_mods) {
@@ -367,13 +364,9 @@ int main(int argc, char** argv) {
367364
compile_settings.enabled_precisions.insert(torch::kF16);
368365
} else if (dtype == torchtrt::DataType::kChar) {
369366
compile_settings.enabled_precisions.insert(torch::kI8);
370-
if (calibration_cache_file) {
371-
compile_settings.ptq_calibrator = calibrator;
372-
} else {
373-
torchtrt::logging::log(
374-
torchtrt::logging::Level::kINFO,
375-
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
376-
}
367+
torchtrt::logging::log(
368+
torchtrt::logging::Level::kDEBUG,
369+
"Int8 precision has been enabled which assumes the network has Q/DQ nodes obtained");
377370
} else {
378371
std::stringstream ss;
379372
ss << "Invalid precision given for enabled kernel precision, options are [ float | float32 | f32 | fp32 | half | float16 | f16 | fp16 | char | int8 | i8 ], found: ";

0 commit comments

Comments
 (0)