Add boundary checks and add tests for SparseTensorProtoToDenseTensorProto (#27323)

yuslepukhin · web-flow · commit 1ade88abb55e · 2026-02-13T11:00:53.000-08:00
### Motivation and Context
### Description
This pull request improves the robustness and correctness of sparse
tensor conversion to dense tensors in the ONNX Runtime codebase. The
main changes include enhanced validation of tensor shapes and indices,
improved support for different index data types, and better error
handling for out-of-bounds indices and invalid shapes.

**Validation and Error Handling Improvements:**
- Added explicit checks to ensure that sparse tensor indices are either
rank 1 or 2, and that values are 1D, returning clear errors for
unsupported shapes. Also, added checks to ensure all dimension sizes are
positive for both values and dense tensors.
- Improved error messages and validation for out-of-bounds indices
during the sparse-to-dense copy process, including both flattened and
multi-dimensional COO indices.
[[1]](diffhunk://#diff-d31e9fbe0f5334fcd949833e035f2b25d5ae810dcd505c545f6b372b546b1406L1827-R1850)
[[2]](diffhunk://#diff-d31e9fbe0f5334fcd949833e035f2b25d5ae810dcd505c545f6b372b546b1406R1861-R1863)

**Index Data Type Handling:**
- Enhanced support for INT8 and INT16 index types by allowing them to be
provided in the `int32_data` field (not just as raw data), and improved
handling of index data extraction for all supported types.

**API and Internal Consistency:**
- Refactored the `CopySparseData` function to take both `dense_dims` and
`dense_elements` as arguments for clearer logic and consistency.
[[1]](diffhunk://#diff-d31e9fbe0f5334fcd949833e035f2b25d5ae810dcd505c545f6b372b546b1406L1747-R1781)
[[2]](diffhunk://#diff-d31e9fbe0f5334fcd949833e035f2b25d5ae810dcd505c545f6b372b546b1406L1904-R1947)
[[3]](diffhunk://#diff-d31e9fbe0f5334fcd949833e035f2b25d5ae810dcd505c545f6b372b546b1406L1928-R1957)
[[4]](diffhunk://#diff-d31e9fbe0f5334fcd949833e035f2b25d5ae810dcd505c545f6b372b546b1406L1938-R1967)
- Updated function documentation to clarify supported formats and
expected tensor shapes.

**Minor Corrections:**
- Improved error messages for invalid index shapes and ensured correct
usage of ONNX enum values and utility functions.
[[1]](diffhunk://#diff-d31e9fbe0f5334fcd949833e035f2b25d5ae810dcd505c545f6b372b546b1406L1863-R1872)
[[2]](diffhunk://#diff-d31e9fbe0f5334fcd949833e035f2b25d5ae810dcd505c545f6b372b546b1406L1747-R1781)

These changes collectively make sparse tensor handling more robust,
user-friendly, and standards-compliant.

### Motivation and Context
Make the code more robust
diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
@@ -1741,117 +1741,140 @@ void MakeCpuTensorCopy(const Tensor& src_tensor, Tensor& dst_tensor) {
 }
 
 #if !defined(DISABLE_SPARSE_TENSORS)
-static Status CopySparseData(size_t n_sparse_elements,
+static Status CopySparseData(const std::string& name,
+                             int64_t nnz_elements,
                              const ONNX_NAMESPACE::TensorProto& indices,
                              const std::filesystem::path& model_path,
-                             gsl::span<const int64_t>
-                                 dims,
-                             std::function<void(size_t from_idx, size_t to_idx)>
-                                 copier) {
+                             gsl::span<const int64_t> dense_dims,
+                             int64_t dense_elements,
+                             std::function<void(size_t from_idx, size_t to_idx)> copier) {
   Status status = Status::OK();
   TensorShape indices_shape(indices.dims().data(), indices.dims().size());
-  const auto elements = narrow<size_t>(indices_shape.Size());
+  const int64_t indices_elements = indices_shape.Size();
 
-  std::vector<int64_t> indices_values;  // used for conversion of smaller size indices
+  InlinedVector<int64_t> indices_values;  // used for conversion of smaller size indices
   std::vector<uint8_t> unpack_buffer;
   gsl::span<const int64_t> indices_data;
-  const bool has_raw_data = indices.has_raw_data();
+  const bool needs_unpack = utils::HasRawData(indices) || utils::HasExternalData(indices);
   switch (indices.data_type()) {
     case ONNX_NAMESPACE::TensorProto_DataType_INT64:
-      if (has_raw_data) {
-        ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int64_t)),
-                          "Sparse Indices raw data size does not match expected.");
+      if (needs_unpack) {
+        ORT_RETURN_IF_NOT(indices.raw_data().size() == (narrow<size_t>(indices_elements) * sizeof(int64_t)),
+                          "Sparse tensor: ", name, " indices raw data size does not match expected: ",
+                          indices_elements * sizeof(int64_t));
         ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
         indices_data = ReinterpretAsSpan<const int64_t>(gsl::make_span(unpack_buffer));
       } else {
-        ORT_RETURN_IF_NOT(indices.int64_data_size() == static_cast<int64_t>(elements),
-                          "Sparse indices int64 data size does not match expected");
-        indices_data = gsl::make_span(indices.int64_data().data(), elements);
+        ORT_RETURN_IF_NOT(indices.int64_data_size() == indices_elements,
+                          "Sparse tensor: ", name, " indices int64 data size does not match expected: ",
+                          indices_elements);
+        indices_data = gsl::make_span(indices.int64_data().data(), narrow<size_t>(indices_elements));
       }
       break;
     case ONNX_NAMESPACE::TensorProto_DataType_INT32: {
-      if (has_raw_data) {
-        ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int32_t)),
-                          "Sparse Indices raw data size does not match expected.");
+      if (needs_unpack) {
+        ORT_RETURN_IF_NOT(indices.raw_data().size() == (narrow<size_t>(indices_elements) * sizeof(int32_t)),
+                          "Sparse tensor: ", name, " indices raw data size does not match expected: ",
+                          indices_elements * sizeof(int32_t));
         ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
         auto int32_span = ReinterpretAsSpan<const int32_t>(gsl::make_span(unpack_buffer));
         indices_values.insert(indices_values.cend(), int32_span.begin(), int32_span.end());
         unpack_buffer.clear();
         unpack_buffer.shrink_to_fit();
       } else {
-        ORT_RETURN_IF_NOT(indices.int32_data_size() == static_cast<int64_t>(elements),
-                          "Sparse indices int32 data size does not match expected");
+        ORT_RETURN_IF_NOT(indices.int32_data_size() == indices_elements,
+                          "Sparse tensor: ", name, " indices int32 data size does not match expected: ",
+                          indices_elements);
         indices_values.insert(indices_values.cend(), indices.int32_data().cbegin(), indices.int32_data().cend());
       }
       indices_data = gsl::make_span(indices_values);
       break;
     }
     case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
-      if (has_raw_data) {
-        ORT_RETURN_IF_NOT(indices.raw_data().size() == (elements * sizeof(int16_t)),
-                          "Sparse Indices raw data size does not match expected.");
+      if (needs_unpack) {
+        ORT_RETURN_IF_NOT(indices.raw_data().size() == (narrow<size_t>(indices_elements) * sizeof(int16_t)),
+                          "Sparse tensor: ", name, " indices raw data size does not match expected: ",
+                          indices_elements * sizeof(int16_t));
         ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
         auto int16_span = ReinterpretAsSpan<const int16_t>(gsl::make_span(unpack_buffer));
         indices_values.insert(indices_values.cend(), int16_span.begin(), int16_span.end());
-        indices_data = gsl::make_span(indices_values);
         unpack_buffer.clear();
         unpack_buffer.shrink_to_fit();
       } else {
-        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
-                               "Invalid SparseTensor indices. INT16 indices must be in the raw data of indices tensor");
+        ORT_RETURN_IF_NOT(indices.int32_data_size() == indices_elements,
+                          "Sparse tensor: ", name, " indices int16 data size does not match expected: ",
+                          indices_elements);
+        indices_values.insert(indices_values.cend(), indices.int32_data().cbegin(), indices.int32_data().cend());
       }
+      indices_data = gsl::make_span(indices_values);
       break;
     }
     case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
-      if (has_raw_data) {
-        ORT_RETURN_IF_NOT(indices.raw_data().size() == elements,
-                          "Sparse Indices raw data size does not match expected.");
+      if (needs_unpack) {
+        ORT_RETURN_IF_NOT(indices.raw_data().size() == narrow<size_t>(indices_elements),
+                          "Sparse tensor: ", name, " indices raw data size does not match expected: ",
+                          indices_elements * sizeof(int8_t));
         ORT_RETURN_IF_ERROR(UnpackInitializerData(indices, model_path, unpack_buffer));
         auto int8_span = ReinterpretAsSpan<const int8_t>(gsl::make_span(unpack_buffer));
         indices_values.insert(indices_values.cend(), int8_span.begin(), int8_span.end());
-        indices_data = gsl::make_span(indices_values);
         unpack_buffer.clear();
         unpack_buffer.shrink_to_fit();
       } else {
-        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
-                               "Invalid SparseTensor indices. INT8 indices must be in the raw data of indices tensor");
+        ORT_RETURN_IF_NOT(indices.int32_data_size() == indices_elements,
+                          "Sparse tensor: ", name, " indices int8 data size does not match expected: ",
+                          indices_elements);
+        indices_values.insert(indices_values.cend(), indices.int32_data().cbegin(), indices.int32_data().cend());
       }
+      indices_data = gsl::make_span(indices_values);
       break;
     }
     default:
       return ORT_MAKE_STATUS(
           ONNXRUNTIME, INVALID_GRAPH,
-          "Invalid SparseTensor indices. Should one of the following types: int8, int16, int32 or int64");
+          "Sparse tensor: ", name, " indices. Should be one of the following types: int8, int16, int32 or int64");
   }
 
-  if (indices_shape.NumDimensions() == 1) {
+  const auto indices_rank = indices_shape.NumDimensions();
+  if (indices_rank == 1) {
     // flattened indexes
-    for (size_t i = 0; i < n_sparse_elements; ++i) {
-      copier(i, narrow<size_t>(indices_data[i]));
+    for (size_t i = 0, lim = narrow<size_t>(nnz_elements); i < lim; ++i) {
+      const auto idx = indices_data[i];
+      ORT_RETURN_IF_NOT(idx >= 0 && idx < dense_elements,
+                        "Sparse tensor: ", name, " index is out of bounds. Got:", idx,
+                        " expected to be in [0, ", dense_elements, ")");
+
+      copier(i, narrow<size_t>(idx));
     }
-  } else if (indices_shape.NumDimensions() == 2) {
+  } else if (indices_rank == 2) {
     // entries in format {NNZ, rank}
-    ORT_ENFORCE(indices_shape[1] > 0 && static_cast<size_t>(indices_shape[1]) == dims.size());
-    auto rank = static_cast<size_t>(indices_shape[1]);
+    ORT_ENFORCE(indices_shape[1] > 0 && static_cast<size_t>(indices_shape[1]) == dense_dims.size());
+    const auto rank = static_cast<size_t>(indices_shape[1]);
     auto cur_index = indices_data.begin();
-    std::vector<size_t> multipliers;
+    InlinedVector<size_t> multipliers;
     multipliers.resize(rank);
 
     // calculate sum of inner dimension elements for each dimension.
     // e.g. if shape {2,3,4}, the result should be {3*4, 4, 1}
     multipliers[rank - 1] = 1;
     for (auto r = rank - 1; r > 0; --r) {
-      multipliers[r - 1] = SafeInt<size_t>(dims[r]) * multipliers[r];
+      multipliers[r - 1] = SafeInt<size_t>(dense_dims[r]) * multipliers[r];
     }
 
     // calculate the offset for the entry
     // e.g. if shape was {2,3,4} and entry was (1, 0, 2) the offset is 14
     // as there are 2 rows, each with 12 entries per row
-    for (size_t i = 0; i < n_sparse_elements; ++i) {
+    for (size_t i = 0, lim = narrow<size_t>(nnz_elements); i < lim; ++i) {
       SafeInt<int64_t> idx = 0;
       for (size_t j = 0; j < rank; ++j) {
-        idx += SafeInt<int64_t>(cur_index[j]) * multipliers[j];
+        const auto dim_index = cur_index[j];
+        ORT_RETURN_IF_NOT(dim_index >= 0 && dim_index < dense_dims[j],
+                          "Sparse tensor: ", name, " index is out of bounds. Got:", dim_index,
+                          " expected to be in [0, ", dense_dims[j], ")");
+        idx += SafeInt<int64_t>(dim_index) * multipliers[j];
       }
+      ORT_RETURN_IF_NOT(idx >= 0 && idx < dense_elements,
+                        "Sparse tensor: ", name, " index is out of bounds. Got:", static_cast<int64_t>(idx),
+                        " expected to be in [0, ", dense_elements, ")");
 
       copier(i, static_cast<size_t>(idx));
       cur_index += rank;
@@ -1860,7 +1883,7 @@ static Status CopySparseData(size_t n_sparse_elements,
     ORT_ENFORCE(cur_index == indices_data.end());
   } else {
     status = ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
-                             "Invalid SparseTensor indices. Should be rank 0 or 1. Got:", indices_shape);
+                             "Sparse tensor: ", name, " indices shape. Expected to be rank 1 or 2. Got:", indices_shape);
   }
 
   return status;
@@ -1869,53 +1892,110 @@ static Status CopySparseData(size_t n_sparse_elements,
 common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
                                                    const std::filesystem::path& model_path,
                                                    ONNX_NAMESPACE::TensorProto& dense) {
-  Status status = Status::OK();
+  Status status;
 
   const auto& sparse_values = sparse.values();
-  auto type = sparse_values.data_type();
-  dense.set_data_type(type);
-  *dense.mutable_name() = sparse_values.name();
+  const auto& name = sparse_values.name();
 
-  SafeInt<size_t> n_sparse_elements = 1;
-  for (auto dim : sparse_values.dims()) {
-    n_sparse_elements *= dim;
+  const auto values_rank = sparse_values.dims_size();
+  if (values_rank != 1) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                           "Sparse tensor: ", name, " values should be rank 1 for COO format. Got:", values_rank);
   }
 
-  SafeInt<size_t> n_dense_elements = 1;
+  auto type = sparse_values.data_type();
+  dense.set_data_type(type);
+  *dense.mutable_name() = name;
+  SafeInt<int64_t> dense_elements = 1;
+
   for (auto dim : sparse.dims()) {
-    n_dense_elements *= dim;
+    if (dim < 0) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                             "Sparse tensor: ", name, " dense dims expected to be non-negative. Got:", dim);
+    }
+    dense_elements *= dim;
     dense.add_dims(dim);
   }
 
+  const auto dense_dims = gsl::make_span<const int64_t>(dense.dims().data(), dense.dims().size());
+
+  SafeInt<int64_t> nnz_elements = 1;
+  for (auto dim : sparse_values.dims()) {
+    if (dim < 0) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                             "Sparse tensor: ", name, " tensor dims expected to be non-negative. Got:", dim);
+    }
+    nnz_elements *= dim;
+  }
+
   const auto& indices = sparse.indices();
-  auto dims = gsl::make_span<const int64_t>(dense.dims().data(), dense.dims().size());
+  const auto indices_rank = indices.dims_size();
+  if (indices_rank != 1 && indices_rank != 2) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                           "Sparse tensor: ", name, " indices should be rank 1 or 2 for supported COO format. Got:", indices_rank);
+  }
 
-  if (type != TensorProto_DataType_STRING) {
-    auto ml_data = DataTypeImpl::TensorTypeFromONNXEnum(type)->GetElementType();
-    size_t element_size = ml_data->Size();
+  const auto indices_dims = gsl::make_span(indices.dims().data(), indices.dims().size());
+
+  if (indices_dims[0] != nnz_elements) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                           "Sparse tensor: ", name,
+                           " indices outer dimension should match the number of non-zero values. Got:",
+                           indices_dims[0], " expected: ", static_cast<int64_t>(nnz_elements));
+  }
 
-    // need to read in sparse data first as it could be in a type specific field, in raw data, or in external data
-    std::vector<uint8_t> sparse_data_storage;
-    ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, sparse_data_storage));
-    void* sparse_data = sparse_data_storage.data();
+  if (indices_rank == 2 && dense_dims.size() != narrow<size_t>(indices_dims[1])) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                           "Sparse tensor: ", name,
+                           " indices is rank 2, its inner dimension should match the rank of the dense tensor. Got:",
+                           indices_dims[1], " expected: ", dense_dims.size());
+  }
+
+  if (indices_rank == 2) {
+    const auto num_indices = TensorShape(indices_dims).Size();
+    const int64_t expected_indices_entries = SafeInt<int64_t>(nnz_elements) * indices_dims[1];
+    if (num_indices != expected_indices_entries) {
+      return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_GRAPH,
+                             "Sparse tensor: ", name,
+                             " indices is rank 2, it should have NNZ values * indices_dims[1] entries. Got:",
+                             num_indices, " expected: ", expected_indices_entries);
+    }
+  }
+
+  if (dense_elements == 0) {
+    // if there are no elements in the dense tensor, we can return early with an empty tensor proto
+    return status;
+  }
+
+  if (type != ONNX_NAMESPACE::TensorProto_DataType_STRING) {
+    auto ml_data = DataTypeImpl::TensorTypeFromONNXEnum(type)->GetElementType();
+    const size_t element_size = ml_data->Size();
 
     // by putting the data into a std::string we can avoid a copy as set_raw_data can do a std::move
     // into the TensorProto.
-    std::string dense_data_storage(n_dense_elements * element_size, 0);
-    if (n_sparse_elements > 0) {
+    std::string dense_data_storage(narrow<size_t>(dense_elements) * element_size, 0);
+    if (nnz_elements > 0) {
+      // need to read in sparse data first as it could be in a type specific field, in raw data, or in external data
+      std::vector<uint8_t> values_data;
+      ORT_RETURN_IF_ERROR(UnpackInitializerData(sparse_values, model_path, values_data));
+      ORT_RETURN_IF_NOT(values_data.size() == static_cast<size_t>(nnz_elements) * element_size,
+                        "Sparse tensor: ", name, " values data size does not match expected: ",
+                        static_cast<size_t>(nnz_elements) * element_size);
+      void* sparse_data = values_data.data();
       void* dense_data = dense_data_storage.data();
 
       switch (element_size) {
         case 1: {
           status = CopySparseData(
-              n_sparse_elements, indices, model_path, dims, [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
+              name, nnz_elements, indices, model_path, dense_dims, dense_elements,
+              [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
                 static_cast<uint8_t*>(dense_data)[to_idx] = static_cast<const uint8_t*>(sparse_data)[from_idx];
               });
 
           break;
         }
         case 2: {
-          status = CopySparseData(n_sparse_elements, indices, model_path, dims,
+          status = CopySparseData(name, nnz_elements, indices, model_path, dense_dims, dense_elements,
                                   [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
                                     const auto* src = static_cast<const uint16_t*>(sparse_data) + from_idx;
                                     auto* dst = static_cast<uint16_t*>(dense_data) + to_idx;
@@ -1925,7 +2005,7 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
           break;
         }
         case 4: {
-          status = CopySparseData(n_sparse_elements, indices, model_path, dims,
+          status = CopySparseData(name, nnz_elements, indices, model_path, dense_dims, dense_elements,
                                   [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
                                     const auto* src = static_cast<const uint32_t*>(sparse_data) + from_idx;
                                     auto* dst = static_cast<uint32_t*>(dense_data) + to_idx;
@@ -1935,7 +2015,7 @@ common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseT
           break;
         }
         case 8: {
-          status = CopySparseData(n_sparse_elements, indices, model_path, dims,
+          status = CopySparseData(name, nnz_elements, indices, model_path, dense_dims, dense_elements,
                                   [sparse_data, dense_data](size_t from_idx, size_t to_idx) {
                                     const auto* src = static_cast<const uint64_t*>(sparse_data) + from_idx;
                                     auto* dst = static_cast<uint64_t*>(dense_data) + to_idx;
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
@@ -249,10 +249,14 @@ common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& n
 void MakeCpuTensorCopy(const Tensor& src_tensor, Tensor& dst_tensor);
 
 #if !defined(DISABLE_SPARSE_TENSORS)
-// Convert a SparseTensorProto to a dense TensorProto
-// If the SparseTensorProto contains external data then it loads the data and converts to dense tensor proto
-// The resulting TensorProto will contain the data as raw data.
-// model_path is used for constructing full path for external_data
+/// <summary>
+// The function supports only COO format with 1D or 2D indices. Values shape is expected to be 1D.
+// The function does not support sparse tensors of other formats like CSR/CSC.
+/// </summary>
+/// <param name="sparse"></param>
+/// <param name="model_path">model path is only used if there are references to external data.</param>
+/// <param name="dense">The resulting dense tensor proto.</param>
+/// <returns>Status</returns>
 common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
                                                    const std::filesystem::path& model_path,
                                                    ONNX_NAMESPACE::TensorProto& dense);
diff --git a/onnxruntime/test/framework/sparse_kernels_test.cc b/onnxruntime/test/framework/sparse_kernels_test.cc