diff --git a/.ci/docker/ci_commit_pins/pytorch.txt b/.ci/docker/ci_commit_pins/pytorch.txt index e3a53c8bcb5..a8de771a69d 100644 --- a/.ci/docker/ci_commit_pins/pytorch.txt +++ b/.ci/docker/ci_commit_pins/pytorch.txt @@ -1 +1 @@ -53a2908a10f414a2f85caa06703a26a40e873869 +e6f766c7d750d40603eee3f66c5915bac606b3ea diff --git a/.ci/scripts/utils.sh b/.ci/scripts/utils.sh index f896d3f1d40..8f48e75e712 100644 --- a/.ci/scripts/utils.sh +++ b/.ci/scripts/utils.sh @@ -44,6 +44,44 @@ install_pip_dependencies() { popd || return } +dedupe_macos_loader_path_rpaths() { + if [[ "$(uname)" != "Darwin" ]]; then + return + fi + + local torch_lib_dir + pushd .. + torch_lib_dir=$(python -c "import importlib.util; print(importlib.util.find_spec('torch').submodule_search_locations[0])")/lib + popd + + if [[ -z "${torch_lib_dir}" || ! -d "${torch_lib_dir}" ]]; then + return + fi + + local torch_libs=( + "libtorch_cpu.dylib" + "libtorch.dylib" + "libc10.dylib" + ) + + for lib_name in "${torch_libs[@]}"; do + local lib_path="${torch_lib_dir}/${lib_name}" + if [[ ! -f "${lib_path}" ]]; then + continue + fi + + local removed=0 + # Repeatedly remove the @loader_path rpath entries until none remain. + while install_name_tool -delete_rpath @loader_path "${lib_path}" 2>/dev/null; do + removed=1 + done + + if [[ "${removed}" == "1" ]]; then + install_name_tool -add_rpath @loader_path "${lib_path}" || true + fi + done +} + install_domains() { echo "Install torchvision and torchaudio" pip install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${TORCHAUDIO_VERSION}" @@ -101,6 +139,7 @@ install_pytorch_and_domains() { echo "Use cached wheel at ${cached_torch_wheel}" fi + dedupe_macos_loader_path_rpaths # Grab the pinned audio and vision commits from PyTorch TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt) export TORCHAUDIO_VERSION diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 8f0d8f6e571..c96b85740bc 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -351,6 +351,7 @@ jobs: # reinstall executorch bash ./install_executorch.sh --minimal + pip list # run python unittest python -m unittest examples.models.moshi.mimi.test_mimi diff --git a/CMakeLists.txt b/CMakeLists.txt index f5091a2af2e..1b96c12fbf3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,28 +99,6 @@ announce_configured_options(CCACHE_PROGRAM) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -# Setup RPATH. See -# https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling -# Use separate rpaths during build and install phases -set(CMAKE_SKIP_BUILD_RPATH OFF) -# Don't use the install-rpath during the build phase -set(CMAKE_BUILD_WITH_INSTALL_RPATH ON) -# Automatically add all linked folders that are NOT in the build directory to -# the rpath (per library?) -# -# TODO: Doesn't work for us right now because we are not installing .so's into -# the correct locations. For example we have libcustom_ops_aot_lib.so depending -# on _portable_lib.so, which was eventually put under -# /executorch/extension/pybindings/ but this rpath is not -# automatically added because at build time it seems `portable_lib` is being -# built under the same directory, so no extra rpath is being added. To properly -# fix this we need to install `portable_lib` into the correct path. -set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON) -# ------------------------------ OPTIONS ------------------------------------- -# WARNING: Please don't add example specific options in this CMakeLists.txt. -# Instead please use `find_package(executorch REQUIRED)` in the example -# directory and add a new executable in the example `CMakeLists.txt`. - if(NOT EXECUTORCH_ENABLE_LOGGING) # Avoid pulling in the logging strings, which can be large. Note that this # will set the compiler flag for all targets in this directory, and for all @@ -909,12 +887,13 @@ if(EXECUTORCH_BUILD_PYBIND) # Set RPATH to find PyTorch libraries relative to the installation location # This goes from executorch/extension/pybindings up to site-packages, then to - # torch/lib + # torch/lib. Don't do this to APPLE, as it will error out on the following + # error: + # if(APPLE) - set_target_properties( - portable_lib PROPERTIES BUILD_RPATH "@loader_path/../../../torch/lib" - INSTALL_RPATH "@loader_path/../../../torch/lib" - ) + # Skip setting @loader_path for APPLE, since it causes error like ld: + # duplicate LC_RPATH '@loader_path' in '/torch/lib/ + # libtorch_cpu.dylib' else() set_target_properties( portable_lib PROPERTIES BUILD_RPATH "$ORIGIN/../../../torch/lib" diff --git a/backends/aoti/aoti_delegate_handle.h b/backends/aoti/aoti_delegate_handle.h index 2e72fc39821..82ce2521750 100644 --- a/backends/aoti/aoti_delegate_handle.h +++ b/backends/aoti/aoti_delegate_handle.h @@ -71,6 +71,11 @@ using AOTInductorModelContainerGetNumConstantsFunc = AOTIRuntimeError (*)( AOTInductorModelContainerHandle container_handle, size_t* num_constants); +// Update the model container with the constant tensors +using AOTInductorModelUpdateConstantsFromBlobFunc = AOTIRuntimeError (*)( + AOTInductorModelContainerHandle container_handle, + const uint8_t* weight_blob_ptr); + } // extern "C" // AOTI Delegate Handle structure @@ -87,6 +92,7 @@ struct AOTIDelegateHandle { AOTInductorModelContainerGetNumInputsFunc get_num_inputs; AOTInductorModelContainerGetNumOutputsFunc get_num_outputs; AOTInductorModelContainerRunFunc run; + AOTInductorModelUpdateConstantsFromBlobFunc update_constants_from_blob; }; } // namespace aoti diff --git a/backends/cuda/cuda_backend.py b/backends/cuda/cuda_backend.py index 05d01972833..ba6da92b991 100644 --- a/backends/cuda/cuda_backend.py +++ b/backends/cuda/cuda_backend.py @@ -146,8 +146,11 @@ def preprocess( "aot_inductor.embed_kernel_binary": True, # Do not link against the full PyTorch/libtorch library "aot_inductor.link_libtorch": False, - # Package model constants and other generated files directly in the shared object (.so) file - "aot_inductor.package_constants_in_so": True, + # Separate weight constants from the .so file + "aot_inductor.package": True, + "aot_inductor.package_constants_in_so": False, + # Store weight constants on disk in a binary blob + "aot_inductor.package_constants_on_disk_format": "binary_blob", # Enable maximum automatic tuning for optimal performance "max_autotune": True, # Use TRITON for GEMM (General Matrix Multiply) operations tuning only to avoid using operators in libtorch @@ -162,7 +165,8 @@ def preprocess( ] ), torch.no_grad(): # torch._logging.set_logs(post_grad_graphs=True) - so_path = torch._inductor.aot_compile(edge_program_module, tuple(user_input_placeholders), options=options) # type: ignore[arg-type] + # Here we should expect 1 so file and 1 weight blob in the same directory. + paths = torch._inductor.aot_compile(edge_program_module, tuple(user_input_placeholders), options=options) # type: ignore[arg-type] if len(missing_fallback_kernels) > 0: formatted_kernels = "\n - ".join(sorted(missing_fallback_kernels)) raise RuntimeError( @@ -170,17 +174,40 @@ def preprocess( "Please add them to the AOTI backend." ) + # Extract the .so and .blob paths from the returned list + so_path = None + blob_path = None + for path in paths: + if path.endswith(".wrapper.so"): + so_path = path + elif path.endswith(".wrapper_weights.blob"): + blob_path = path + + if so_path is None or blob_path is None: + raise RuntimeError( + f"Could not find required files in compiled paths, got {paths}" + ) + # pyre-ignorep[6]: Incompatible parameter type with open(so_path, "rb") as f: so_data = f.read() named_data_store = NamedDataStore() method_name = CudaBackend.method_name_from_compile_specs(compile_specs) + + # Keep the so file in the NamedDataStore, so that it can be packaged into the .pte file. + named_data_store.add_named_data(method_name + "_so_blob", so_data, 1, None) + + # Add weights blob to named data store + with open(blob_path, "rb") as f: + blob_data = f.read() named_data_store.add_named_data( - method_name + "_so_blob", so_data, 1, "aoti_cuda_blob" + method_name + "_weights_blob", blob_data, 1, "aoti_cuda_blob" ) + # Clean up the weights blob file + os.remove(blob_path) - # Clean up the generated so file; it has been packaged into the NamdeDataStore + # Clean up the generated so file; it has been packaged into the NamedDataStore # pyre-ignorep[6]: Incompatible parameter type os.remove(so_path) diff --git a/backends/cuda/runtime/cuda_backend.cpp b/backends/cuda/runtime/cuda_backend.cpp index e61b03ee8e6..0cef859ddfb 100644 --- a/backends/cuda/runtime/cuda_backend.cpp +++ b/backends/cuda/runtime/cuda_backend.cpp @@ -27,15 +27,6 @@ namespace executorch::backends::cuda { -#define LOAD_SYMBOL(handle, member, name, so_handle) \ - do { \ - auto symbol_res = get_function(so_handle, #name); \ - if (!symbol_res.ok()) { \ - return symbol_res.error(); \ - } \ - handle->member = reinterpret_cast(symbol_res.get()); \ - } while (0) - using namespace std; using namespace aoti; @@ -61,29 +52,37 @@ class ET_EXPERIMENTAL CudaBackend final Error load_function_pointers_into_handle( void* so_handle, AOTIDelegateHandle* handle) const { - LOAD_SYMBOL( - handle, - create_with_device, - AOTInductorModelContainerCreateWithDevice, - so_handle); +#define LOAD_SYMBOL(member, name) \ + do { \ + auto symbol_res = get_function(so_handle, #name); \ + if (!symbol_res.ok()) { \ + return symbol_res.error(); \ + } \ + handle->member = reinterpret_cast(symbol_res.get()); \ + } while (0) + + LOAD_SYMBOL(create_with_device, AOTInductorModelContainerCreateWithDevice); - LOAD_SYMBOL( - handle, delete_container, AOTInductorModelContainerDelete, so_handle); + LOAD_SYMBOL(delete_container, AOTInductorModelContainerDelete); - LOAD_SYMBOL( - handle, - get_num_inputs, - AOTInductorModelContainerGetNumInputs, - so_handle); + LOAD_SYMBOL(get_num_inputs, AOTInductorModelContainerGetNumInputs); - LOAD_SYMBOL( - handle, - get_num_outputs, - AOTInductorModelContainerGetNumOutputs, - so_handle); + LOAD_SYMBOL(get_num_outputs, AOTInductorModelContainerGetNumOutputs); - LOAD_SYMBOL(handle, run, AOTInductorModelContainerRun, so_handle); + LOAD_SYMBOL(run, AOTInductorModelContainerRun); +#undef LOAD_SYMBOL + auto symbol_res = + get_function(so_handle, "AOTInductorModelUpdateConstantsFromBlob"); + if (symbol_res.ok()) { + handle->update_constants_from_blob = + reinterpret_cast( + symbol_res.get()); + } else { + ET_LOG( + Info, + "Failed to load AOTInductorModelUpdateConstantsFromBlob. This .so is probably compiled on an old version of torch (<2.9.0)"); + } return Error::Ok; } @@ -112,13 +111,13 @@ class ET_EXPERIMENTAL CudaBackend final method_name.empty() ? "so_blob" : method_name + "_so_blob"; const NamedDataMap* named_data_map = context.get_named_data_map(); - auto aoti_cuda_buffer = named_data_map->get_data(so_blob_key.c_str()); + auto aoti_dso_buffer = named_data_map->get_data(so_blob_key.c_str()); ET_CHECK_OR_RETURN_ERROR( - aoti_cuda_buffer.ok(), + aoti_dso_buffer.ok(), Internal, "Failed to get data for key %s: 0x%x", so_blob_key.c_str(), - static_cast(aoti_cuda_buffer.error())); + static_cast(aoti_dso_buffer.error())); // Generate dynamic temporary file path filesystem::path temp_dir = filesystem::temp_directory_path(); @@ -132,12 +131,12 @@ class ET_EXPERIMENTAL CudaBackend final ET_LOG( Info, "Writing %zu bytes to %s", - aoti_cuda_buffer->size(), + aoti_dso_buffer->size(), so_path.c_str()); outfile.write( - static_cast(aoti_cuda_buffer->data()), - aoti_cuda_buffer->size()); + static_cast(aoti_dso_buffer->data()), + aoti_dso_buffer->size()); ET_CHECK_OR_RETURN_ERROR( outfile, AccessFailed, "Failed to write to file %s", so_path.c_str()); @@ -145,6 +144,8 @@ class ET_EXPERIMENTAL CudaBackend final // Finish writing the file to disk outfile.close(); + // Free the buffer immediately after writing to disk + aoti_dso_buffer->Free(); // Load the lib Result lib_handle_res = load_library(so_path); if (!lib_handle_res.ok()) { @@ -172,6 +173,19 @@ class ET_EXPERIMENTAL CudaBackend final handle->container_handle = container_handle; + // Look into named data map for constant data + std::string weights_blob_key = + method_name.empty() ? "weights_blob" : method_name + "_weights_blob"; + auto buffer_res = named_data_map->get_data(weights_blob_key.c_str()); + if (buffer_res.ok() && handle->update_constants_from_blob != nullptr) { + ET_LOG(Info, "Found %s in named data map", weights_blob_key.c_str()); + const void* weights_blob = buffer_res->data(); + // Feed the weights blob into the container. Under the hood it's copying + // weights, so we should free the buffer immediately. + ET_CHECK_OK_OR_RETURN_ERROR(handle->update_constants_from_blob( + handle->container_handle, static_cast(weights_blob))); + buffer_res->Free(); + } // Create a CUDA stream for asynchronous execution cudaStream_t cuda_stream; ET_CUDA_CHECK_OR_RETURN_ERROR(cudaStreamCreate(&cuda_stream)); diff --git a/examples/models/moshi/mimi/install_requirements.sh b/examples/models/moshi/mimi/install_requirements.sh index 6df4caf8692..bddd960f8a7 100755 --- a/examples/models/moshi/mimi/install_requirements.sh +++ b/examples/models/moshi/mimi/install_requirements.sh @@ -8,9 +8,9 @@ set -x conda install -c conda-forge "ffmpeg<8" -y -pip install torchcodec==0.7.0.dev20250929 --extra-index-url https://download.pytorch.org/whl/nightly/cpu -pip install moshi==0.2.4 -pip install bitsandbytes soundfile +pip install torchcodec==0.7.0.dev20251012 --extra-index-url https://download.pytorch.org/whl/nightly/cpu +pip install moshi==0.2.11 +pip install bitsandbytes soundfile einops # Run llama2/install requirements for torchao deps SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) bash "$SCRIPT_DIR"/../../llama/install_requirements.sh diff --git a/examples/models/moshi/mimi/test_mimi.py b/examples/models/moshi/mimi/test_mimi.py index d0c3c2ceb15..93513c54e78 100644 --- a/examples/models/moshi/mimi/test_mimi.py +++ b/examples/models/moshi/mimi/test_mimi.py @@ -189,8 +189,7 @@ def forward(self, x): x = self.mimi_model.upsample(x) (emb,) = self.mimi_model.decoder_transformer(x) emb.transpose(1, 2) - with self.mimi_model._context_for_encoder_decoder: - out = self.mimi_model.decoder(emb) + out = self.mimi_model.decoder(emb) return out emb_input = torch.rand(1, 1, 512, device="cpu") diff --git a/examples/models/voxtral/multimodal.cpp b/examples/models/voxtral/multimodal.cpp index b3dd5e3ab68..29edf955751 100644 --- a/examples/models/voxtral/multimodal.cpp +++ b/examples/models/voxtral/multimodal.cpp @@ -319,7 +319,7 @@ int32_t main(int32_t argc, char** argv) { // Create multimodal runner std::unique_ptr<::executorch::extension::llm::MultimodalRunner> runner = ::executorch::extension::llm::create_multimodal_runner( - model_path, std::move(tokenizer), data_path); + model_path, std::move(tokenizer), data_path, Module::LoadMode::Mmap); if (runner == nullptr) { ET_LOG(Error, "Failed to create multimodal runner"); return 1; diff --git a/extension/llm/runner/llm_runner_helper.cpp b/extension/llm/runner/llm_runner_helper.cpp index d1e4ff2ce45..674be820072 100644 --- a/extension/llm/runner/llm_runner_helper.cpp +++ b/extension/llm/runner/llm_runner_helper.cpp @@ -268,7 +268,8 @@ std::unique_ptr create_text_llm_runner( std::unique_ptr create_multimodal_runner( const std::string& model_path, std::unique_ptr<::tokenizers::Tokenizer> tokenizer, - std::optional data_path) { + std::optional data_path, + Module::LoadMode load_mode) { // Sanity check tokenizer if (!tokenizer || !tokenizer->is_loaded()) { ET_LOG(Error, "Tokenizer is null or not loaded"); @@ -278,10 +279,9 @@ std::unique_ptr create_multimodal_runner( // Create the Module std::unique_ptr module; if (data_path.has_value()) { - module = std::make_unique( - model_path, data_path.value(), Module::LoadMode::File); + module = std::make_unique(model_path, data_path.value(), load_mode); } else { - module = std::make_unique(model_path, Module::LoadMode::File); + module = std::make_unique(model_path, load_mode); } // Get metadata from Module diff --git a/extension/llm/runner/llm_runner_helper.h b/extension/llm/runner/llm_runner_helper.h index 5c109581e19..08f0efd0353 100644 --- a/extension/llm/runner/llm_runner_helper.h +++ b/extension/llm/runner/llm_runner_helper.h @@ -140,6 +140,7 @@ ET_EXPERIMENTAL std::unique_ptr create_text_llm_runner( ET_EXPERIMENTAL std::unique_ptr create_multimodal_runner( const std::string& model_path, std::unique_ptr<::tokenizers::Tokenizer> tokenizer, - std::optional data_path = std::nullopt); + std::optional data_path = std::nullopt, + Module::LoadMode load_mode = Module::LoadMode::File); } // namespace executorch::extension::llm diff --git a/runtime/core/portable_type/c10/c10/util/llvmMathExtras.h b/runtime/core/portable_type/c10/c10/util/llvmMathExtras.h index 556699be04b..6321297a61c 100644 --- a/runtime/core/portable_type/c10/c10/util/llvmMathExtras.h +++ b/runtime/core/portable_type/c10/c10/util/llvmMathExtras.h @@ -70,7 +70,7 @@ enum ZeroBehavior { namespace detail { template struct TrailingZerosCounter { - static std::size_t count(T Val, ZeroBehavior) { + static std::size_t count(T Val, ZeroBehavior /*unused*/) { if (!Val) return std::numeric_limits::digits; if (Val & 0x1) @@ -147,7 +147,7 @@ std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { namespace detail { template struct LeadingZerosCounter { - static std::size_t count(T Val, ZeroBehavior) { + static std::size_t count(T Val, ZeroBehavior /*unused*/) { if (!Val) return std::numeric_limits::digits; diff --git a/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h b/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h index 558edb175ae..e340e7626a0 100644 --- a/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h +++ b/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h @@ -359,6 +359,7 @@ static inline int C10_WARP_SIZE_INTERNAL() { // Those platforms do not support assert() #define CUDA_KERNEL_ASSERT(cond) #define CUDA_KERNEL_ASSERT_MSG(cond, msg) +#define CUDA_KERNEL_ASSERT_PRINTF(cond, msg, ...) #define SYCL_KERNEL_ASSERT(cond) #elif defined(_MSC_VER) #if defined(NDEBUG) @@ -396,6 +397,26 @@ __host__ __device__ static_cast(__LINE__)), \ 0); \ } +#define CUDA_KERNEL_ASSERT_PRINTF(cond, msg, ...) \ + if (C10_UNLIKELY(!(cond))) { \ + (void)(printf( \ + "[CUDA_KERNEL_ASSERT] " __FILE__ ":" C10_STRINGIZE( \ + __LINE__) ": %s: block: [%d,%d,%d], thread: [%d,%d,%d]: " \ + "Assertion failed: `" #cond "`: " msg "\n", \ + __func__, \ + blockIdx.x, \ + blockIdx.y, \ + blockIdx.z, \ + threadIdx.x, \ + threadIdx.y, \ + threadIdx.z, \ + ##__VA_ARGS__)); \ + (void)(_wassert( \ + _CRT_WIDE(#cond), \ + _CRT_WIDE(__FILE__), \ + static_cast(__LINE__)), \ + 0); \ + } #define SYCL_KERNEL_ASSERT(cond) \ if (C10_UNLIKELY(!(cond))) { \ (void)(_wassert( \ @@ -455,6 +476,10 @@ __host__ __device__ if C10_UNLIKELY (!(cond)) { \ abort(); \ } +#define CUDA_KERNEL_ASSERT_PRINTF(cond, msg, ...) \ + if C10_UNLIKELY (!(cond)) { \ + abort(); \ + } #define SYCL_KERNEL_ASSERT(cond) \ if C10_UNLIKELY (!(cond)) { \ abort(); \ @@ -470,6 +495,23 @@ __host__ __device__ __assert_fail( \ msg, __FILE__, static_cast(__LINE__), __func__); \ } +#define CUDA_KERNEL_ASSERT_PRINTF(cond, msg, ...) \ + if (C10_UNLIKELY(!(cond))) { \ + printf( \ + "[CUDA_KERNEL_ASSERT] " __FILE__ ":" C10_STRINGIZE( \ + __LINE__) ": %s: block: [%d,%d,%d], thread: [%d,%d,%d]: " \ + "Assertion failed: `" #cond "`: " msg "\n", \ + __func__, \ + blockIdx.x, \ + blockIdx.y, \ + blockIdx.z, \ + threadIdx.x, \ + threadIdx.y, \ + threadIdx.z, \ + ##__VA_ARGS__); \ + __assert_fail( \ + #cond, __FILE__, static_cast(__LINE__), __func__); \ + } #define SYCL_KERNEL_ASSERT(cond) \ if (C10_UNLIKELY(!(cond))) { \ __assert_fail( \ diff --git a/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h b/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h index 2c1f805ac7b..ac47e3f844a 100644 --- a/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h +++ b/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h @@ -39,7 +39,9 @@ struct alignas(2) BFloat16 { return from_bits_t(); } - constexpr C10_HOST_DEVICE BFloat16(unsigned short bits, from_bits_t) + constexpr C10_HOST_DEVICE BFloat16( + unsigned short bits, + from_bits_t /*unused*/) : x(bits) {} /* implicit */ inline C10_HOST_DEVICE BFloat16(float value); inline C10_HOST_DEVICE operator float() const; diff --git a/runtime/core/portable_type/c10/torch/headeronly/util/Half.h b/runtime/core/portable_type/c10/torch/headeronly/util/Half.h index 59a86f07e33..9673301e2de 100644 --- a/runtime/core/portable_type/c10/torch/headeronly/util/Half.h +++ b/runtime/core/portable_type/c10/torch/headeronly/util/Half.h @@ -80,7 +80,8 @@ struct alignas(2) Half { Half() = default; #endif - constexpr C10_HOST_DEVICE Half(unsigned short bits, from_bits_t) : x(bits) {} + constexpr C10_HOST_DEVICE Half(unsigned short bits, from_bits_t /*unused*/) + : x(bits) {} #if defined(__aarch64__) && !defined(__CUDACC__) inline Half(float16_t value); inline operator float16_t() const; diff --git a/torch_pin.py b/torch_pin.py index 02040c91963..5e54c848d13 100644 --- a/torch_pin.py +++ b/torch_pin.py @@ -1,2 +1,2 @@ TORCH_VERSION = "2.10.0" -NIGHTLY_VERSION = "dev20251003" +NIGHTLY_VERSION = "dev20251015"