diff --git a/CHANGELOG.md b/CHANGELOG.md index eb748ba4cc5..6a4e8c95ddb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ - Fix advanced indexing bug with sliced boolean masks on CUDA devices (PR #7340) - Fix logic for adding -allow-unsupported-compiler to nvcc (PR #7337) - Fix linker error "library limit of 65535 objects exceeded" with Ninja generator on MSVC (PR #7335) +- Add assertion guard for invalid indexing operations (PR #7360) ## 0.13 diff --git a/cpp/open3d/core/AdvancedIndexing.h b/cpp/open3d/core/AdvancedIndexing.h index ab69dcac8ea..673a682d7a7 100644 --- a/cpp/open3d/core/AdvancedIndexing.h +++ b/cpp/open3d/core/AdvancedIndexing.h @@ -183,8 +183,12 @@ class AdvancedIndexer { for (int64_t i = 0; i < num_indices_; ++i) { int64_t index = *(reinterpret_cast( indexer_.GetInputPtr(i + 1, workload_idx))); +#if defined(__CUDACC__) + assert(index >= -indexed_shape_[i] && index < indexed_shape_[i] && "Index out of bounds."); +#else OPEN3D_ASSERT(index >= -indexed_shape_[i] && index < indexed_shape_[i] && "Index out of bounds."); +#endif index += indexed_shape_[i] * (index < 0); offset += index * indexed_strides_[i]; } diff --git a/cpp/open3d/core/Indexer.h b/cpp/open3d/core/Indexer.h index 76f99acba5d..8ca19be0072 100644 --- a/cpp/open3d/core/Indexer.h +++ b/cpp/open3d/core/Indexer.h @@ -106,7 +106,7 @@ struct TensorRef { // The default copy constructor works on __device__ as well so we don't // define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be // copied fully. - TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {} + TensorRef() : data_ptr_(nullptr) {} TensorRef(const Tensor& t) { if (t.NumDims() > MAX_DIMS) { @@ -116,9 +116,17 @@ struct TensorRef { data_ptr_ = const_cast(t.GetDataPtr()); ndims_ = t.NumDims(); dtype_byte_size_ = t.GetDtype().ByteSize(); + total_byte_size_ = 0; for (int64_t i = 0; i < ndims_; ++i) { shape_[i] = t.GetShape(i); byte_strides_[i] = t.GetStride(i) * dtype_byte_size_; + // The end of the buffer should be at the end of the largest strided + // dimension block This way, we can compute the total buffer size in + // both cases (when tensor is contiguous and when it is not) If it + // is not contiguous, the actual "end" of the buffer may not be + // simply NumElements() * dtype_byte_size_ + total_byte_size_ = + std::max(total_byte_size_, shape_[i] * byte_strides_[i]); } } @@ -175,6 +183,7 @@ struct TensorRef { rc = rc && (data_ptr_ == other.data_ptr_); rc = rc && (ndims_ == other.ndims_); rc = rc && (dtype_byte_size_ == other.dtype_byte_size_); + rc = rc && (total_byte_size_ == other.total_byte_size_); for (int64_t i = 0; i < ndims_; ++i) { rc = rc && (shape_[i] == other.shape_[i]); rc = rc && (byte_strides_[i] == other.byte_strides_[i]); @@ -192,6 +201,7 @@ struct TensorRef { void* data_ptr_; int64_t ndims_ = 0; int64_t dtype_byte_size_ = 0; + int64_t total_byte_size_ = 0; int64_t shape_[MAX_DIMS]; int64_t byte_strides_[MAX_DIMS]; }; @@ -242,6 +252,14 @@ class TensorIterator { input_.byte_strides_[i]; workload_idx = workload_idx % input_.byte_strides_[i]; } + +#if defined(__CUDACC__) + assert(offset >= 0 && offset < input_.total_byte_size_); +#else + OPEN3D_ASSERT(offset >= 0 && offset < input_.total_byte_size_ && + "TensorIterator operation data pointer is out of range."); +#endif + return static_cast(static_cast(input_.data_ptr_) + offset); } @@ -546,18 +564,25 @@ class Indexer { if (workload_idx < 0) { return nullptr; } + + int64_t offset = 0; if (tr_contiguous) { - return static_cast(tr.data_ptr_) + - workload_idx * tr.dtype_byte_size_; + offset = workload_idx * tr.dtype_byte_size_; } else { - int64_t offset = 0; for (int64_t i = 0; i < ndims_; ++i) { offset += workload_idx / primary_strides_[i] * tr.byte_strides_[i]; workload_idx = workload_idx % primary_strides_[i]; } - return static_cast(tr.data_ptr_) + offset; } + +#if defined(__CUDACC__) + assert(offset >= 0 && offset < tr.total_byte_size_); +#else + OPEN3D_ASSERT(offset >= 0 && offset < tr.total_byte_size_ && + "Index operation data pointer is out of range."); +#endif + return static_cast(tr.data_ptr_) + offset; } /// Get data pointer from a TensorRef with \p workload_idx. @@ -570,23 +595,11 @@ class Indexer { OPEN3D_HOST_DEVICE T* GetWorkloadDataPtr(const TensorRef& tr, bool tr_contiguous, int64_t workload_idx) const { - // For 0-sized input reduction op, the output Tensor - // workload_idx == 1 > NumWorkloads() == 0. - if (workload_idx < 0) { - return nullptr; - } - if (tr_contiguous) { - return static_cast(tr.data_ptr_) + workload_idx; - } else { - int64_t offset = 0; - for (int64_t i = 0; i < ndims_; ++i) { - offset += workload_idx / primary_strides_[i] * - tr.byte_strides_[i]; - workload_idx = workload_idx % primary_strides_[i]; - } - return static_cast(static_cast( - static_cast(tr.data_ptr_) + offset)); - } + // See note of this function. + // If sizeof(T) == tr.dtype_byte_size_, then we can just static cast the + // byte pointer. + return static_cast(static_cast( + GetWorkloadDataPtr(tr, tr_contiguous, workload_idx))); } /// Number of input and output Tensors. @@ -638,7 +651,7 @@ class Indexer { class IndexerIterator { public: struct Iterator { - Iterator() {}; + Iterator(){}; Iterator(const Indexer& indexer); Iterator(Iterator&& other) = default; diff --git a/cpp/tests/core/TensorCheck.cpp b/cpp/tests/core/TensorCheck.cpp index 14fb471d874..2f8113b9af5 100644 --- a/cpp/tests/core/TensorCheck.cpp +++ b/cpp/tests/core/TensorCheck.cpp @@ -7,6 +7,7 @@ #include "open3d/core/TensorCheck.h" +#include "open3d/core/CUDAUtils.h" #include "open3d/utility/Helper.h" #include "tests/Tests.h" #include "tests/core/CoreTest.h" @@ -201,5 +202,40 @@ TEST_P(TensorCheckPermuteDevices, AssertTensorShape) { } } +#if BUILD_CUDA_MODULE +class TensorCheckPermuteDevicesDeathTest : public TensorCheckPermuteDevices {}; +INSTANTIATE_TEST_SUITE_P( + Tensor, + TensorCheckPermuteDevicesDeathTest, + testing::ValuesIn(TensorCheckPermuteDevicesDeathTest::TestCases())); + +TEST_P(TensorCheckPermuteDevicesDeathTest, AssertTensorIndexOps) { + GTEST_FLAG_SET(death_test_style, "threadsafe"); + core::Device device = GetParam(); + core::Tensor idx = core::Tensor::Init( + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, device); + core::Tensor t = core::Tensor::Zeros({10}, core::Float32, device); + core::Tensor val = + core::Tensor::Ones({idx.GetLength()}, core::Float32, device); + if (device.IsCUDA()) { + // for CUDA, this is the best we can do + try { + t.IndexAdd_(0, idx, val); + core::cuda::Synchronize(); + core::OPEN3D_GET_LAST_CUDA_ERROR("Index operation failed"); + FAIL() << "Should not reach here."; + } catch (std::runtime_error const& err) { + EXPECT_TRUE(utility::ContainsString( + err.what(), + "CUDA runtime error: device-side assert triggered")); + } catch (...) { + FAIL() << "std::runtime_error not thrown."; + } + } else { + EXPECT_DEATH(t.IndexAdd_(0, idx, val), + "Index operation data pointer is out of range."); + } +} +#endif } // namespace tests } // namespace open3d