1515// specific language governing permissions and limitations
1616// under the License.
1717
18- #include " arrow/tensor/converter .h"
18+ #include " arrow/tensor/converter_internal .h"
1919
2020#include < algorithm>
21+ #include < cmath>
2122#include < cstdint>
2223#include < limits>
2324#include < memory>
2930#include " arrow/status.h"
3031#include " arrow/tensor.h"
3132#include " arrow/type.h"
33+ #include " arrow/type_traits.h"
3234#include " arrow/util/checked_cast.h"
35+ #include " arrow/util/logging_internal.h"
3336#include " arrow/util/sort_internal.h"
37+ #include " arrow/visit_type_inline.h"
3438
3539namespace arrow {
3640
@@ -54,6 +58,89 @@ inline void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_
5458 }
5559}
5660
61+ template <typename ValueType, typename IndexType>
62+ Status CheckValues (const SparseCSFIndex& sparse_csf_index,
63+ const typename ValueType::c_type* values, const Tensor& tensor,
64+ const int64_t dim, const int64_t dim_offset, const int64_t start,
65+ const int64_t stop) {
66+ using ValueCType = typename ValueType::c_type;
67+ using IndexCType = typename IndexType::c_type;
68+
69+ const auto & indices = sparse_csf_index.indices ();
70+ const auto & indptr = sparse_csf_index.indptr ();
71+ const auto & axis_order = sparse_csf_index.axis_order ();
72+ auto ndim = indices.size ();
73+ auto strides = tensor.strides ();
74+
75+ const auto & cur_indices = indices[dim];
76+ const auto * indices_data = cur_indices->data ()->data_as <IndexCType>() + start;
77+
78+ if (dim == static_cast <int64_t >(ndim) - 1 ) {
79+ for (auto i = start; i < stop; ++i) {
80+ auto index = static_cast <int64_t >(*indices_data);
81+ const int64_t offset = dim_offset + index * strides[axis_order[dim]];
82+
83+ auto sparse_value = values[i];
84+ auto tensor_value =
85+ *reinterpret_cast <const ValueCType*>(tensor.raw_data () + offset);
86+ if (!is_not_zero<ValueType>(sparse_value)) {
87+ return Status::Invalid (" Sparse tensor values must be non-zero" );
88+ } else if (sparse_value != tensor_value) {
89+ if constexpr (is_floating_type<ValueType>::value) {
90+ if (!std::isnan (tensor_value) || !std::isnan (sparse_value)) {
91+ return Status::Invalid (
92+ " Inconsistent values between sparse tensor and dense tensor" );
93+ }
94+ } else {
95+ return Status::Invalid (
96+ " Inconsistent values between sparse tensor and dense tensor" );
97+ }
98+ }
99+ ++indices_data;
100+ }
101+ } else {
102+ const auto & cur_indptr = indptr[dim];
103+ const auto * indptr_data = cur_indptr->data ()->data_as <IndexCType>() + start;
104+
105+ for (int64_t i = start; i < stop; ++i) {
106+ const int64_t index = *indices_data;
107+ int64_t offset = dim_offset + index * strides[axis_order[dim]];
108+ auto next_start = static_cast <int64_t >(*indptr_data);
109+ auto next_stop = static_cast <int64_t >(*(indptr_data + 1 ));
110+
111+ ARROW_RETURN_NOT_OK ((CheckValues<ValueType, IndexType>(
112+ sparse_csf_index, values, tensor, dim + 1 , offset, next_start, next_stop)));
113+
114+ ++indices_data;
115+ ++indptr_data;
116+ }
117+ }
118+ return Status::OK ();
119+ }
120+
121+ template <typename ValueType, typename IndexType>
122+ Status ValidateSparseTensorCSFCreation (const SparseIndex& sparse_index,
123+ const Buffer& values_buffer,
124+ const Tensor& tensor) {
125+ auto sparse_csf_index = checked_cast<const SparseCSFIndex&>(sparse_index);
126+ const auto * values = values_buffer.data_as <typename ValueType::c_type>();
127+ const auto & indices = sparse_csf_index.indices ();
128+
129+ ARROW_ASSIGN_OR_RAISE (auto non_zero_count, tensor.CountNonZero ());
130+ if (indices.back ()->size () != non_zero_count) {
131+ return Status::Invalid (" Mismatch between non-zero count in sparse tensor (" ,
132+ indices.back ()->size (), " ) and dense tensor (" , non_zero_count,
133+ " )" );
134+ } else if (indices.size () != tensor.shape ().size ()) {
135+ return Status::Invalid (" Mismatch between coordinate dimension in sparse tensor (" ,
136+ indices.size (), " ) and tensor shape (" , tensor.shape ().size (),
137+ " )" );
138+ } else {
139+ return CheckValues<ValueType, IndexType>(sparse_csf_index, values, tensor, 0 , 0 , 0 ,
140+ sparse_csf_index.indptr ()[0 ]->size () - 1 );
141+ }
142+ }
143+
57144// ----------------------------------------------------------------------
58145// SparseTensorConverter for SparseCSFIndex
59146
@@ -88,8 +175,10 @@ class SparseCSFTensorConverter {
88175 std::vector<int64_t > coord (ndim, 0 );
89176 std::vector<int64_t > previous_coord (ndim, -1 );
90177
91- std::vector<TypedBufferBuilder<IndexCType>> indptr_buffer_builders (ndim - 1 );
92- std::vector<TypedBufferBuilder<IndexCType>> indices_buffer_builders (ndim);
178+ std::vector<TypedBufferBuilder<IndexCType>> indptr_buffer_builders (
179+ ndim - 1 , TypedBufferBuilder<IndexCType>(pool_));
180+ std::vector<TypedBufferBuilder<IndexCType>> indices_buffer_builders (
181+ ndim, TypedBufferBuilder<IndexCType>(pool_));
93182
94183 auto * values = values_buffer->mutable_data_as <ValueCType>();
95184
@@ -146,6 +235,8 @@ class SparseCSFTensorConverter {
146235 ARROW_ASSIGN_OR_RAISE (
147236 sparse_index, SparseCSFIndex::Make (index_value_type_, indices_shapes, axis_order,
148237 indptr_buffers, indices_buffers));
238+ DCHECK_OK ((ValidateSparseTensorCSFCreation<ValueType, IndexType>(*sparse_index, *data,
239+ tensor_)));
149240 return Status::OK ();
150241 }
151242
@@ -262,7 +353,7 @@ Status MakeSparseCSFTensorFromTensor(const Tensor& tensor,
262353 std::shared_ptr<Buffer>* out_data) {
263354 SparseCSFTensorConverter converter (tensor, index_value_type, pool);
264355 ConverterVisitor visitor{converter};
265- ARROW_RETURN_NOT_OK (VisitValueAndIndexType (tensor.type (), index_value_type, visitor));
356+ ARROW_RETURN_NOT_OK (VisitValueAndIndexType (* tensor.type (), * index_value_type, visitor));
266357 *out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index );
267358 *out_data = converter.data ;
268359 return Status::OK ();
0 commit comments