From 2a50e756a056c12a5177360c6f3d3ce5b7f3ccd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20To=C5=A1kan?= Date: Tue, 4 Nov 2025 09:29:27 +0100 Subject: [PATCH 1/7] Added assertion if index operation is out of range to prevent memory corruption --- cpp/open3d/core/Indexer.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/cpp/open3d/core/Indexer.h b/cpp/open3d/core/Indexer.h index 76f99acba5d..0ca6bdf41b7 100644 --- a/cpp/open3d/core/Indexer.h +++ b/cpp/open3d/core/Indexer.h @@ -106,7 +106,11 @@ struct TensorRef { // The default copy constructor works on __device__ as well so we don't // define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be // copied fully. - TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {} + TensorRef() + : data_ptr_(nullptr), + ndims_(0), + dtype_byte_size_(0), + total_byte_size_(0) {} TensorRef(const Tensor& t) { if (t.NumDims() > MAX_DIMS) { @@ -116,6 +120,7 @@ struct TensorRef { data_ptr_ = const_cast(t.GetDataPtr()); ndims_ = t.NumDims(); dtype_byte_size_ = t.GetDtype().ByteSize(); + total_byte_size_ = t.NumElements() * dtype_byte_size_; for (int64_t i = 0; i < ndims_; ++i) { shape_[i] = t.GetShape(i); byte_strides_[i] = t.GetStride(i) * dtype_byte_size_; @@ -192,6 +197,7 @@ struct TensorRef { void* data_ptr_; int64_t ndims_ = 0; int64_t dtype_byte_size_ = 0; + int64_t total_byte_size_ = 0; int64_t shape_[MAX_DIMS]; int64_t byte_strides_[MAX_DIMS]; }; @@ -546,18 +552,21 @@ class Indexer { if (workload_idx < 0) { return nullptr; } + + int64_t offset = 0; if (tr_contiguous) { - return static_cast(tr.data_ptr_) + - workload_idx * tr.dtype_byte_size_; + offset = workload_idx * tr.dtype_byte_size_; } else { - int64_t offset = 0; for (int64_t i = 0; i < ndims_; ++i) { offset += workload_idx / primary_strides_[i] * tr.byte_strides_[i]; workload_idx = workload_idx % primary_strides_[i]; } - return static_cast(tr.data_ptr_) + offset; } + + OPEN3D_ASSERT(offset >= 0 && offset < tr.total_byte_size_ && + "Index operation data pointer is out of range."); + return static_cast(tr.data_ptr_) + offset; } /// Get data pointer from a TensorRef with \p workload_idx. @@ -638,7 +647,7 @@ class Indexer { class IndexerIterator { public: struct Iterator { - Iterator() {}; + Iterator(){}; Iterator(const Indexer& indexer); Iterator(Iterator&& other) = default; From 873537d8d4319ec4ca4da96f3cf316703729f6da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20To=C5=A1kan?= Date: Tue, 4 Nov 2025 15:12:13 +0100 Subject: [PATCH 2/7] Added tests and another guard for CUDA cases --- cpp/open3d/core/Indexer.h | 4 ++++ cpp/tests/core/TensorCheck.cpp | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/cpp/open3d/core/Indexer.h b/cpp/open3d/core/Indexer.h index 0ca6bdf41b7..dd405ad5677 100644 --- a/cpp/open3d/core/Indexer.h +++ b/cpp/open3d/core/Indexer.h @@ -564,8 +564,12 @@ class Indexer { } } +#if defined(__CUDACC__) + assert(offset >= 0 && offset < tr.total_byte_size_); +#else OPEN3D_ASSERT(offset >= 0 && offset < tr.total_byte_size_ && "Index operation data pointer is out of range."); +#endif return static_cast(tr.data_ptr_) + offset; } diff --git a/cpp/tests/core/TensorCheck.cpp b/cpp/tests/core/TensorCheck.cpp index 14fb471d874..2f8113b9af5 100644 --- a/cpp/tests/core/TensorCheck.cpp +++ b/cpp/tests/core/TensorCheck.cpp @@ -7,6 +7,7 @@ #include "open3d/core/TensorCheck.h" +#include "open3d/core/CUDAUtils.h" #include "open3d/utility/Helper.h" #include "tests/Tests.h" #include "tests/core/CoreTest.h" @@ -201,5 +202,40 @@ TEST_P(TensorCheckPermuteDevices, AssertTensorShape) { } } +#if BUILD_CUDA_MODULE +class TensorCheckPermuteDevicesDeathTest : public TensorCheckPermuteDevices {}; +INSTANTIATE_TEST_SUITE_P( + Tensor, + TensorCheckPermuteDevicesDeathTest, + testing::ValuesIn(TensorCheckPermuteDevicesDeathTest::TestCases())); + +TEST_P(TensorCheckPermuteDevicesDeathTest, AssertTensorIndexOps) { + GTEST_FLAG_SET(death_test_style, "threadsafe"); + core::Device device = GetParam(); + core::Tensor idx = core::Tensor::Init( + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, device); + core::Tensor t = core::Tensor::Zeros({10}, core::Float32, device); + core::Tensor val = + core::Tensor::Ones({idx.GetLength()}, core::Float32, device); + if (device.IsCUDA()) { + // for CUDA, this is the best we can do + try { + t.IndexAdd_(0, idx, val); + core::cuda::Synchronize(); + core::OPEN3D_GET_LAST_CUDA_ERROR("Index operation failed"); + FAIL() << "Should not reach here."; + } catch (std::runtime_error const& err) { + EXPECT_TRUE(utility::ContainsString( + err.what(), + "CUDA runtime error: device-side assert triggered")); + } catch (...) { + FAIL() << "std::runtime_error not thrown."; + } + } else { + EXPECT_DEATH(t.IndexAdd_(0, idx, val), + "Index operation data pointer is out of range."); + } +} +#endif } // namespace tests } // namespace open3d From 41182581891fe9c56deb3bedfd9af0b0c8e32c29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20To=C5=A1kan?= <54912695+TwentyPast4@users.noreply.github.com> Date: Tue, 4 Nov 2025 17:41:24 +0100 Subject: [PATCH 3/7] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb748ba4cc5..6a4e8c95ddb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ - Fix advanced indexing bug with sliced boolean masks on CUDA devices (PR #7340) - Fix logic for adding -allow-unsupported-compiler to nvcc (PR #7337) - Fix linker error "library limit of 65535 objects exceeded" with Ninja generator on MSVC (PR #7335) +- Add assertion guard for invalid indexing operations (PR #7360) ## 0.13 From 9d51a1a271b4856c756d8873fb525f094b20a346 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20To=C5=A1kan?= Date: Wed, 5 Nov 2025 13:25:05 +0100 Subject: [PATCH 4/7] Added guard for index iterators as well --- cpp/open3d/core/Indexer.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cpp/open3d/core/Indexer.h b/cpp/open3d/core/Indexer.h index dd405ad5677..4dca7baa5ce 100644 --- a/cpp/open3d/core/Indexer.h +++ b/cpp/open3d/core/Indexer.h @@ -180,6 +180,7 @@ struct TensorRef { rc = rc && (data_ptr_ == other.data_ptr_); rc = rc && (ndims_ == other.ndims_); rc = rc && (dtype_byte_size_ == other.dtype_byte_size_); + rc = rc && (total_byte_size_ == other.total_byte_size_); for (int64_t i = 0; i < ndims_; ++i) { rc = rc && (shape_[i] == other.shape_[i]); rc = rc && (byte_strides_[i] == other.byte_strides_[i]); @@ -248,6 +249,14 @@ class TensorIterator { input_.byte_strides_[i]; workload_idx = workload_idx % input_.byte_strides_[i]; } + +#if defined(__CUDACC__) + assert(offset >= 0 && offset < input_.total_byte_size_); +#else + OPEN3D_ASSERT(offset >= 0 && offset < input_.total_byte_size_ && + "TensorIterator operation data pointer is out of range."); +#endif + return static_cast(static_cast(input_.data_ptr_) + offset); } From 8534bc1495b8261ad922d3822a052cb5bbef3d80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20To=C5=A1kan?= Date: Wed, 5 Nov 2025 13:37:03 +0100 Subject: [PATCH 5/7] Fixed templated indexer access method not using the same path as the non-templated one. --- cpp/open3d/core/Indexer.h | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/cpp/open3d/core/Indexer.h b/cpp/open3d/core/Indexer.h index 4dca7baa5ce..afdd2b13ce4 100644 --- a/cpp/open3d/core/Indexer.h +++ b/cpp/open3d/core/Indexer.h @@ -592,23 +592,11 @@ class Indexer { OPEN3D_HOST_DEVICE T* GetWorkloadDataPtr(const TensorRef& tr, bool tr_contiguous, int64_t workload_idx) const { - // For 0-sized input reduction op, the output Tensor - // workload_idx == 1 > NumWorkloads() == 0. - if (workload_idx < 0) { - return nullptr; - } - if (tr_contiguous) { - return static_cast(tr.data_ptr_) + workload_idx; - } else { - int64_t offset = 0; - for (int64_t i = 0; i < ndims_; ++i) { - offset += workload_idx / primary_strides_[i] * - tr.byte_strides_[i]; - workload_idx = workload_idx % primary_strides_[i]; - } - return static_cast(static_cast( - static_cast(tr.data_ptr_) + offset)); - } + // See note of this function. + // If sizeof(T) == tr.dtype_byte_size_, then we can just static cast the + // byte pointer. + return static_cast(static_cast( + GetWorkloadDataPtr(tr, tr_contiguous, workload_idx))); } /// Number of input and output Tensors. From 1db91f2d9312bab9cda5bfe518e8597ac90ac265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20To=C5=A1kan?= Date: Wed, 5 Nov 2025 14:01:44 +0100 Subject: [PATCH 6/7] Generalized buffer byte size limit calculation for non-contiguous cases --- cpp/open3d/core/Indexer.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cpp/open3d/core/Indexer.h b/cpp/open3d/core/Indexer.h index afdd2b13ce4..8ca19be0072 100644 --- a/cpp/open3d/core/Indexer.h +++ b/cpp/open3d/core/Indexer.h @@ -106,11 +106,7 @@ struct TensorRef { // The default copy constructor works on __device__ as well so we don't // define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be // copied fully. - TensorRef() - : data_ptr_(nullptr), - ndims_(0), - dtype_byte_size_(0), - total_byte_size_(0) {} + TensorRef() : data_ptr_(nullptr) {} TensorRef(const Tensor& t) { if (t.NumDims() > MAX_DIMS) { @@ -120,10 +116,17 @@ struct TensorRef { data_ptr_ = const_cast(t.GetDataPtr()); ndims_ = t.NumDims(); dtype_byte_size_ = t.GetDtype().ByteSize(); - total_byte_size_ = t.NumElements() * dtype_byte_size_; + total_byte_size_ = 0; for (int64_t i = 0; i < ndims_; ++i) { shape_[i] = t.GetShape(i); byte_strides_[i] = t.GetStride(i) * dtype_byte_size_; + // The end of the buffer should be at the end of the largest strided + // dimension block This way, we can compute the total buffer size in + // both cases (when tensor is contiguous and when it is not) If it + // is not contiguous, the actual "end" of the buffer may not be + // simply NumElements() * dtype_byte_size_ + total_byte_size_ = + std::max(total_byte_size_, shape_[i] * byte_strides_[i]); } } From 5d341f6ae815ba45d055fc9c54df9514b6a22de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dan=20To=C5=A1kan?= Date: Wed, 5 Nov 2025 15:46:44 +0100 Subject: [PATCH 7/7] Tweaked assert for CUDA cases --- cpp/open3d/core/AdvancedIndexing.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/open3d/core/AdvancedIndexing.h b/cpp/open3d/core/AdvancedIndexing.h index ab69dcac8ea..673a682d7a7 100644 --- a/cpp/open3d/core/AdvancedIndexing.h +++ b/cpp/open3d/core/AdvancedIndexing.h @@ -183,8 +183,12 @@ class AdvancedIndexer { for (int64_t i = 0; i < num_indices_; ++i) { int64_t index = *(reinterpret_cast( indexer_.GetInputPtr(i + 1, workload_idx))); +#if defined(__CUDACC__) + assert(index >= -indexed_shape_[i] && index < indexed_shape_[i] && "Index out of bounds."); +#else OPEN3D_ASSERT(index >= -indexed_shape_[i] && index < indexed_shape_[i] && "Index out of bounds."); +#endif index += indexed_shape_[i] * (index < 0); offset += index * indexed_strides_[i]; }