1818#include " arrow/tensor/converter_internal.h"
1919
2020#include < algorithm>
21- #include < cmath>
2221#include < cstdint>
2322#include < limits>
2423#include < memory>
3029#include " arrow/status.h"
3130#include " arrow/tensor.h"
3231#include " arrow/type.h"
33- #include " arrow/type_traits.h"
3432#include " arrow/util/checked_cast.h"
3533#include " arrow/util/logging_internal.h"
3634#include " arrow/util/sort_internal.h"
37- #include " arrow/visit_type_inline .h"
35+ #include " arrow/util/sparse_tensor_util .h"
3836
3937namespace arrow {
4038
@@ -58,89 +56,6 @@ inline void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_
5856 }
5957}
6058
61- template <typename ValueType, typename IndexType>
62- Status CheckValues (const SparseCSFIndex& sparse_csf_index,
63- const typename ValueType::c_type* values, const Tensor& tensor,
64- const int64_t dim, const int64_t dim_offset, const int64_t start,
65- const int64_t stop) {
66- using ValueCType = typename ValueType::c_type;
67- using IndexCType = typename IndexType::c_type;
68-
69- const auto & indices = sparse_csf_index.indices ();
70- const auto & indptr = sparse_csf_index.indptr ();
71- const auto & axis_order = sparse_csf_index.axis_order ();
72- auto ndim = indices.size ();
73- auto strides = tensor.strides ();
74-
75- const auto & cur_indices = indices[dim];
76- const auto * indices_data = cur_indices->data ()->data_as <IndexCType>() + start;
77-
78- if (dim == static_cast <int64_t >(ndim) - 1 ) {
79- for (auto i = start; i < stop; ++i) {
80- auto index = static_cast <int64_t >(*indices_data);
81- const int64_t offset = dim_offset + index * strides[axis_order[dim]];
82-
83- auto sparse_value = values[i];
84- auto tensor_value =
85- *reinterpret_cast <const ValueCType*>(tensor.raw_data () + offset);
86- if (!is_not_zero<ValueType>(sparse_value)) {
87- return Status::Invalid (" Sparse tensor values must be non-zero" );
88- } else if (sparse_value != tensor_value) {
89- if constexpr (is_floating_type<ValueType>::value) {
90- if (!std::isnan (tensor_value) || !std::isnan (sparse_value)) {
91- return Status::Invalid (
92- " Inconsistent values between sparse tensor and dense tensor" );
93- }
94- } else {
95- return Status::Invalid (
96- " Inconsistent values between sparse tensor and dense tensor" );
97- }
98- }
99- ++indices_data;
100- }
101- } else {
102- const auto & cur_indptr = indptr[dim];
103- const auto * indptr_data = cur_indptr->data ()->data_as <IndexCType>() + start;
104-
105- for (int64_t i = start; i < stop; ++i) {
106- const int64_t index = *indices_data;
107- int64_t offset = dim_offset + index * strides[axis_order[dim]];
108- auto next_start = static_cast <int64_t >(*indptr_data);
109- auto next_stop = static_cast <int64_t >(*(indptr_data + 1 ));
110-
111- ARROW_RETURN_NOT_OK ((CheckValues<ValueType, IndexType>(
112- sparse_csf_index, values, tensor, dim + 1 , offset, next_start, next_stop)));
113-
114- ++indices_data;
115- ++indptr_data;
116- }
117- }
118- return Status::OK ();
119- }
120-
121- template <typename ValueType, typename IndexType>
122- Status ValidateSparseTensorCSFCreation (const SparseIndex& sparse_index,
123- const Buffer& values_buffer,
124- const Tensor& tensor) {
125- auto sparse_csf_index = checked_cast<const SparseCSFIndex&>(sparse_index);
126- const auto * values = values_buffer.data_as <typename ValueType::c_type>();
127- const auto & indices = sparse_csf_index.indices ();
128-
129- ARROW_ASSIGN_OR_RAISE (auto non_zero_count, tensor.CountNonZero ());
130- if (indices.back ()->size () != non_zero_count) {
131- return Status::Invalid (" Mismatch between non-zero count in sparse tensor (" ,
132- indices.back ()->size (), " ) and dense tensor (" , non_zero_count,
133- " )" );
134- } else if (indices.size () != tensor.shape ().size ()) {
135- return Status::Invalid (" Mismatch between coordinate dimension in sparse tensor (" ,
136- indices.size (), " ) and tensor shape (" , tensor.shape ().size (),
137- " )" );
138- } else {
139- return CheckValues<ValueType, IndexType>(sparse_csf_index, values, tensor, 0 , 0 , 0 ,
140- sparse_csf_index.indptr ()[0 ]->size () - 1 );
141- }
142- }
143-
14459// ----------------------------------------------------------------------
14560// SparseTensorConverter for SparseCSFIndex
14661
@@ -151,8 +66,10 @@ class SparseCSFTensorConverter {
15166 MemoryPool* pool)
15267 : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
15368
154- template <typename ValueType, typename IndexType>
155- Status Convert (const ValueType&, const IndexType&) {
69+ // Note: The same type is considered for both indices and indptr during
70+ // tensor-to-CSF-tensor conversion.
71+ template <typename ValueType, typename IndexType, typename IndexPointerType>
72+ Status Convert (const ValueType&, const IndexType&, const IndexPointerType&) {
15673 using ValueCType = typename ValueType::c_type;
15774 using IndexCType = typename IndexType::c_type;
15875 RETURN_NOT_OK (::arrow::internal::CheckSparseIndexMaximumValue (index_value_type_,
@@ -235,8 +152,6 @@ class SparseCSFTensorConverter {
235152 ARROW_ASSIGN_OR_RAISE (
236153 sparse_index, SparseCSFIndex::Make (index_value_type_, indices_shapes, axis_order,
237154 indptr_buffers, indices_buffers));
238- DCHECK_OK ((ValidateSparseTensorCSFCreation<ValueType, IndexType>(*sparse_index, *data,
239- tensor_)));
240155 return Status::OK ();
241156 }
242157
@@ -353,7 +268,8 @@ Status MakeSparseCSFTensorFromTensor(const Tensor& tensor,
353268 std::shared_ptr<Buffer>* out_data) {
354269 SparseCSFTensorConverter converter (tensor, index_value_type, pool);
355270 ConverterVisitor visitor{converter};
356- ARROW_RETURN_NOT_OK (VisitValueAndIndexType (*tensor.type (), *index_value_type, visitor));
271+ ARROW_RETURN_NOT_OK (
272+ util::VisitCSXType (*tensor.type (), *index_value_type, *index_value_type, visitor));
357273 *out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index );
358274 *out_data = converter.data ;
359275 return Status::OK ();
0 commit comments