diff --git a/CHANGELOG.md b/CHANGELOG.md
index eb748ba4cc5..6a4e8c95ddb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -66,6 +66,7 @@
 -   Fix advanced indexing bug with sliced boolean masks on CUDA devices (PR #7340)
 -   Fix logic for adding -allow-unsupported-compiler to nvcc (PR #7337)
 -   Fix linker error "library limit of 65535 objects exceeded" with Ninja generator on MSVC (PR #7335)
+-   Add assertion guard for invalid indexing operations (PR #7360)
 
 ## 0.13
 
diff --git a/cpp/open3d/core/AdvancedIndexing.h b/cpp/open3d/core/AdvancedIndexing.h
index ab69dcac8ea..673a682d7a7 100644
--- a/cpp/open3d/core/AdvancedIndexing.h
+++ b/cpp/open3d/core/AdvancedIndexing.h
@@ -183,8 +183,12 @@ class AdvancedIndexer {
         for (int64_t i = 0; i < num_indices_; ++i) {
             int64_t index = *(reinterpret_cast<int64_t*>(
                     indexer_.GetInputPtr(i + 1, workload_idx)));
+#if defined(__CUDACC__)
+            assert(index >= -indexed_shape_[i] && index < indexed_shape_[i] && "Index out of bounds.");
+#else
             OPEN3D_ASSERT(index >= -indexed_shape_[i] &&
                           index < indexed_shape_[i] && "Index out of bounds.");
+#endif
             index += indexed_shape_[i] * (index < 0);
             offset += index * indexed_strides_[i];
         }
diff --git a/cpp/open3d/core/Indexer.h b/cpp/open3d/core/Indexer.h
index 76f99acba5d..8ca19be0072 100644
--- a/cpp/open3d/core/Indexer.h
+++ b/cpp/open3d/core/Indexer.h
@@ -106,7 +106,7 @@ struct TensorRef {
     // The default copy constructor works on __device__ as well so we don't
     // define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be
     // copied fully.
-    TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {}
+    TensorRef() : data_ptr_(nullptr) {}
 
     TensorRef(const Tensor& t) {
         if (t.NumDims() > MAX_DIMS) {
@@ -116,9 +116,17 @@ struct TensorRef {
         data_ptr_ = const_cast<void*>(t.GetDataPtr());
         ndims_ = t.NumDims();
         dtype_byte_size_ = t.GetDtype().ByteSize();
+        total_byte_size_ = 0;
         for (int64_t i = 0; i < ndims_; ++i) {
             shape_[i] = t.GetShape(i);
             byte_strides_[i] = t.GetStride(i) * dtype_byte_size_;
+            // The end of the buffer should be at the end of the largest strided
+            // dimension block This way, we can compute the total buffer size in
+            // both cases (when tensor is contiguous and when it is not) If it
+            // is not contiguous, the actual "end" of the buffer may not be
+            // simply NumElements() * dtype_byte_size_
+            total_byte_size_ =
+                    std::max(total_byte_size_, shape_[i] * byte_strides_[i]);
         }
     }
 
@@ -175,6 +183,7 @@ struct TensorRef {
         rc = rc && (data_ptr_ == other.data_ptr_);
         rc = rc && (ndims_ == other.ndims_);
         rc = rc && (dtype_byte_size_ == other.dtype_byte_size_);
+        rc = rc && (total_byte_size_ == other.total_byte_size_);
         for (int64_t i = 0; i < ndims_; ++i) {
             rc = rc && (shape_[i] == other.shape_[i]);
             rc = rc && (byte_strides_[i] == other.byte_strides_[i]);
@@ -192,6 +201,7 @@ struct TensorRef {
     void* data_ptr_;
     int64_t ndims_ = 0;
     int64_t dtype_byte_size_ = 0;
+    int64_t total_byte_size_ = 0;
     int64_t shape_[MAX_DIMS];
     int64_t byte_strides_[MAX_DIMS];
 };
@@ -242,6 +252,14 @@ class TensorIterator {
                       input_.byte_strides_[i];
             workload_idx = workload_idx % input_.byte_strides_[i];
         }
+
+#if defined(__CUDACC__)
+        assert(offset >= 0 && offset < input_.total_byte_size_);
+#else
+        OPEN3D_ASSERT(offset >= 0 && offset < input_.total_byte_size_ &&
+                      "TensorIterator operation data pointer is out of range.");
+#endif
+
         return static_cast<void*>(static_cast<char*>(input_.data_ptr_) +
                                   offset);
     }
@@ -546,18 +564,25 @@ class Indexer {
         if (workload_idx < 0) {
             return nullptr;
         }
+
+        int64_t offset = 0;
         if (tr_contiguous) {
-            return static_cast<char*>(tr.data_ptr_) +
-                   workload_idx * tr.dtype_byte_size_;
+            offset = workload_idx * tr.dtype_byte_size_;
         } else {
-            int64_t offset = 0;
             for (int64_t i = 0; i < ndims_; ++i) {
                 offset += workload_idx / primary_strides_[i] *
                           tr.byte_strides_[i];
                 workload_idx = workload_idx % primary_strides_[i];
             }
-            return static_cast<char*>(tr.data_ptr_) + offset;
         }
+
+#if defined(__CUDACC__)
+        assert(offset >= 0 && offset < tr.total_byte_size_);
+#else
+        OPEN3D_ASSERT(offset >= 0 && offset < tr.total_byte_size_ &&
+                      "Index operation data pointer is out of range.");
+#endif
+        return static_cast<char*>(tr.data_ptr_) + offset;
     }
 
     /// Get data pointer from a TensorRef with \p workload_idx.
@@ -570,23 +595,11 @@ class Indexer {
     OPEN3D_HOST_DEVICE T* GetWorkloadDataPtr(const TensorRef& tr,
                                              bool tr_contiguous,
                                              int64_t workload_idx) const {
-        // For 0-sized input reduction op, the output Tensor
-        // workload_idx == 1 > NumWorkloads() == 0.
-        if (workload_idx < 0) {
-            return nullptr;
-        }
-        if (tr_contiguous) {
-            return static_cast<T*>(tr.data_ptr_) + workload_idx;
-        } else {
-            int64_t offset = 0;
-            for (int64_t i = 0; i < ndims_; ++i) {
-                offset += workload_idx / primary_strides_[i] *
-                          tr.byte_strides_[i];
-                workload_idx = workload_idx % primary_strides_[i];
-            }
-            return static_cast<T*>(static_cast<void*>(
-                    static_cast<char*>(tr.data_ptr_) + offset));
-        }
+        // See note of this function.
+        // If sizeof(T) == tr.dtype_byte_size_, then we can just static cast the
+        // byte pointer.
+        return static_cast<T*>(static_cast<void*>(
+                GetWorkloadDataPtr(tr, tr_contiguous, workload_idx)));
     }
 
     /// Number of input and output Tensors.
@@ -638,7 +651,7 @@ class Indexer {
 class IndexerIterator {
 public:
     struct Iterator {
-        Iterator() {};
+        Iterator(){};
         Iterator(const Indexer& indexer);
         Iterator(Iterator&& other) = default;
 
diff --git a/cpp/tests/core/TensorCheck.cpp b/cpp/tests/core/TensorCheck.cpp
index 14fb471d874..2f8113b9af5 100644
--- a/cpp/tests/core/TensorCheck.cpp
+++ b/cpp/tests/core/TensorCheck.cpp
@@ -7,6 +7,7 @@
 
 #include "open3d/core/TensorCheck.h"
 
+#include "open3d/core/CUDAUtils.h"
 #include "open3d/utility/Helper.h"
 #include "tests/Tests.h"
 #include "tests/core/CoreTest.h"
@@ -201,5 +202,40 @@ TEST_P(TensorCheckPermuteDevices, AssertTensorShape) {
     }
 }
 
+#if BUILD_CUDA_MODULE
+class TensorCheckPermuteDevicesDeathTest : public TensorCheckPermuteDevices {};
+INSTANTIATE_TEST_SUITE_P(
+        Tensor,
+        TensorCheckPermuteDevicesDeathTest,
+        testing::ValuesIn(TensorCheckPermuteDevicesDeathTest::TestCases()));
+
+TEST_P(TensorCheckPermuteDevicesDeathTest, AssertTensorIndexOps) {
+    GTEST_FLAG_SET(death_test_style, "threadsafe");
+    core::Device device = GetParam();
+    core::Tensor idx = core::Tensor::Init<int64_t>(
+            {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, device);
+    core::Tensor t = core::Tensor::Zeros({10}, core::Float32, device);
+    core::Tensor val =
+            core::Tensor::Ones({idx.GetLength()}, core::Float32, device);
+    if (device.IsCUDA()) {
+        // for CUDA, this is the best we can do
+        try {
+            t.IndexAdd_(0, idx, val);
+            core::cuda::Synchronize();
+            core::OPEN3D_GET_LAST_CUDA_ERROR("Index operation failed");
+            FAIL() << "Should not reach here.";
+        } catch (std::runtime_error const& err) {
+            EXPECT_TRUE(utility::ContainsString(
+                    err.what(),
+                    "CUDA runtime error: device-side assert triggered"));
+        } catch (...) {
+            FAIL() << "std::runtime_error not thrown.";
+        }
+    } else {
+        EXPECT_DEATH(t.IndexAdd_(0, idx, val),
+                     "Index operation data pointer is out of range.");
+    }
+}
+#endif
 }  // namespace tests
 }  // namespace open3d