Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
- Fix advanced indexing bug with sliced boolean masks on CUDA devices (PR #7340)
- Fix logic for adding -allow-unsupported-compiler to nvcc (PR #7337)
- Fix linker error "library limit of 65535 objects exceeded" with Ninja generator on MSVC (PR #7335)
- Add assertion guard for invalid indexing operations (PR #7360)

## 0.13

Expand Down
4 changes: 4 additions & 0 deletions cpp/open3d/core/AdvancedIndexing.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,12 @@ class AdvancedIndexer {
for (int64_t i = 0; i < num_indices_; ++i) {
int64_t index = *(reinterpret_cast<int64_t*>(
indexer_.GetInputPtr(i + 1, workload_idx)));
#if defined(__CUDACC__)
assert(index >= -indexed_shape_[i] && index < indexed_shape_[i] && "Index out of bounds.");
#else
OPEN3D_ASSERT(index >= -indexed_shape_[i] &&
index < indexed_shape_[i] && "Index out of bounds.");
#endif
index += indexed_shape_[i] * (index < 0);
offset += index * indexed_strides_[i];
}
Expand Down
59 changes: 36 additions & 23 deletions cpp/open3d/core/Indexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ struct TensorRef {
// The default copy constructor works on __device__ as well so we don't
// define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be
// copied fully.
TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {}
TensorRef() : data_ptr_(nullptr) {}

TensorRef(const Tensor& t) {
if (t.NumDims() > MAX_DIMS) {
Expand All @@ -116,9 +116,17 @@ struct TensorRef {
data_ptr_ = const_cast<void*>(t.GetDataPtr());
ndims_ = t.NumDims();
dtype_byte_size_ = t.GetDtype().ByteSize();
total_byte_size_ = 0;
for (int64_t i = 0; i < ndims_; ++i) {
shape_[i] = t.GetShape(i);
byte_strides_[i] = t.GetStride(i) * dtype_byte_size_;
// The end of the buffer should be at the end of the largest strided
// dimension block This way, we can compute the total buffer size in
// both cases (when tensor is contiguous and when it is not) If it
// is not contiguous, the actual "end" of the buffer may not be
// simply NumElements() * dtype_byte_size_
total_byte_size_ =
std::max(total_byte_size_, shape_[i] * byte_strides_[i]);
}
}

Expand Down Expand Up @@ -175,6 +183,7 @@ struct TensorRef {
rc = rc && (data_ptr_ == other.data_ptr_);
rc = rc && (ndims_ == other.ndims_);
rc = rc && (dtype_byte_size_ == other.dtype_byte_size_);
rc = rc && (total_byte_size_ == other.total_byte_size_);
for (int64_t i = 0; i < ndims_; ++i) {
rc = rc && (shape_[i] == other.shape_[i]);
rc = rc && (byte_strides_[i] == other.byte_strides_[i]);
Expand All @@ -192,6 +201,7 @@ struct TensorRef {
void* data_ptr_;
int64_t ndims_ = 0;
int64_t dtype_byte_size_ = 0;
int64_t total_byte_size_ = 0;
int64_t shape_[MAX_DIMS];
int64_t byte_strides_[MAX_DIMS];
};
Expand Down Expand Up @@ -242,6 +252,14 @@ class TensorIterator {
input_.byte_strides_[i];
workload_idx = workload_idx % input_.byte_strides_[i];
}

#if defined(__CUDACC__)
assert(offset >= 0 && offset < input_.total_byte_size_);
#else
OPEN3D_ASSERT(offset >= 0 && offset < input_.total_byte_size_ &&
"TensorIterator operation data pointer is out of range.");
#endif

return static_cast<void*>(static_cast<char*>(input_.data_ptr_) +
offset);
}
Expand Down Expand Up @@ -546,18 +564,25 @@ class Indexer {
if (workload_idx < 0) {
return nullptr;
}

int64_t offset = 0;
if (tr_contiguous) {
return static_cast<char*>(tr.data_ptr_) +
workload_idx * tr.dtype_byte_size_;
offset = workload_idx * tr.dtype_byte_size_;
} else {
int64_t offset = 0;
for (int64_t i = 0; i < ndims_; ++i) {
offset += workload_idx / primary_strides_[i] *
tr.byte_strides_[i];
workload_idx = workload_idx % primary_strides_[i];
}
return static_cast<char*>(tr.data_ptr_) + offset;
}

#if defined(__CUDACC__)
assert(offset >= 0 && offset < tr.total_byte_size_);
#else
OPEN3D_ASSERT(offset >= 0 && offset < tr.total_byte_size_ &&
"Index operation data pointer is out of range.");
#endif
return static_cast<char*>(tr.data_ptr_) + offset;
}

/// Get data pointer from a TensorRef with \p workload_idx.
Expand All @@ -570,23 +595,11 @@ class Indexer {
OPEN3D_HOST_DEVICE T* GetWorkloadDataPtr(const TensorRef& tr,
bool tr_contiguous,
int64_t workload_idx) const {
// For 0-sized input reduction op, the output Tensor
// workload_idx == 1 > NumWorkloads() == 0.
if (workload_idx < 0) {
return nullptr;
}
if (tr_contiguous) {
return static_cast<T*>(tr.data_ptr_) + workload_idx;
} else {
int64_t offset = 0;
for (int64_t i = 0; i < ndims_; ++i) {
offset += workload_idx / primary_strides_[i] *
tr.byte_strides_[i];
workload_idx = workload_idx % primary_strides_[i];
}
return static_cast<T*>(static_cast<void*>(
static_cast<char*>(tr.data_ptr_) + offset));
}
// See note of this function.
// If sizeof(T) == tr.dtype_byte_size_, then we can just static cast the
// byte pointer.
return static_cast<T*>(static_cast<void*>(
GetWorkloadDataPtr(tr, tr_contiguous, workload_idx)));
}

/// Number of input and output Tensors.
Expand Down Expand Up @@ -638,7 +651,7 @@ class Indexer {
class IndexerIterator {
public:
struct Iterator {
Iterator() {};
Iterator(){};
Iterator(const Indexer& indexer);
Iterator(Iterator&& other) = default;

Expand Down
36 changes: 36 additions & 0 deletions cpp/tests/core/TensorCheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "open3d/core/TensorCheck.h"

#include "open3d/core/CUDAUtils.h"
#include "open3d/utility/Helper.h"
#include "tests/Tests.h"
#include "tests/core/CoreTest.h"
Expand Down Expand Up @@ -201,5 +202,40 @@ TEST_P(TensorCheckPermuteDevices, AssertTensorShape) {
}
}

#if BUILD_CUDA_MODULE
class TensorCheckPermuteDevicesDeathTest : public TensorCheckPermuteDevices {};
INSTANTIATE_TEST_SUITE_P(
Tensor,
TensorCheckPermuteDevicesDeathTest,
testing::ValuesIn(TensorCheckPermuteDevicesDeathTest::TestCases()));

TEST_P(TensorCheckPermuteDevicesDeathTest, AssertTensorIndexOps) {
GTEST_FLAG_SET(death_test_style, "threadsafe");
core::Device device = GetParam();
core::Tensor idx = core::Tensor::Init<int64_t>(
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, device);
core::Tensor t = core::Tensor::Zeros({10}, core::Float32, device);
core::Tensor val =
core::Tensor::Ones({idx.GetLength()}, core::Float32, device);
if (device.IsCUDA()) {
// for CUDA, this is the best we can do
try {
t.IndexAdd_(0, idx, val);
core::cuda::Synchronize();
core::OPEN3D_GET_LAST_CUDA_ERROR("Index operation failed");
FAIL() << "Should not reach here.";
} catch (std::runtime_error const& err) {
EXPECT_TRUE(utility::ContainsString(
err.what(),
"CUDA runtime error: device-side assert triggered"));
} catch (...) {
FAIL() << "std::runtime_error not thrown.";
}
} else {
EXPECT_DEATH(t.IndexAdd_(0, idx, val),
"Index operation data pointer is out of range.");
}
}
#endif
} // namespace tests
} // namespace open3d
Loading