apache
diff --git a/‎cpp/src/arrow/sparse_tensor.cc‎
Lines changed: 357 additions & 21 deletions b/‎cpp/src/arrow/sparse_tensor.cc‎
Lines changed: 357 additions & 21 deletions
diff --git a/‎cpp/src/arrow/sparse_tensor.h‎
Lines changed: 6 additions & 0 deletions b/‎cpp/src/arrow/sparse_tensor.h‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎cpp/src/arrow/sparse_tensor_test.cc‎
Lines changed: 108 additions & 5 deletions b/‎cpp/src/arrow/sparse_tensor_test.cc‎
Lines changed: 108 additions & 5 deletions
diff --git a/‎cpp/src/arrow/tensor.cc‎
Lines changed: 1 addition & 1 deletion b/‎cpp/src/arrow/tensor.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cpp/src/arrow/tensor/converter_internal.h‎
Lines changed: 4 additions & 45 deletions b/‎cpp/src/arrow/tensor/converter_internal.h‎
Lines changed: 4 additions & 45 deletions
diff --git a/‎cpp/src/arrow/tensor/coo_converter.cc‎
Lines changed: 3 additions & 57 deletions b/‎cpp/src/arrow/tensor/coo_converter.cc‎
Lines changed: 3 additions & 57 deletions
diff --git a/‎cpp/src/arrow/tensor/csf_converter.cc‎
Lines changed: 7 additions & 91 deletions b/‎cpp/src/arrow/tensor/csf_converter.cc‎
Lines changed: 7 additions & 91 deletions
@@ -508,6 +508,10 @@ class ARROW_EXPORT SparseTensor {
     return ToTensor(default_memory_pool());
   }
 
+  /// \brief Check whether the sparse tensor is valid and is the
+  /// correct compressed form of the given tensor.
+  Status Validate(const Tensor& tensor) const;
+
  protected:
   // Constructor with all attributes
   SparseTensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
@@ -588,6 +592,8 @@ class SparseTensorImpl : public SparseTensor {
     ARROW_RETURN_NOT_OK(internal::MakeSparseTensorFromTensor(
         tensor, SparseIndexType::format_id, index_value_type, pool, &sparse_index,
         &data));
+    // TODO CHECK SparseTensorCreation.
+
     return std::make_shared<SparseTensorImpl<SparseIndexType>>(
         internal::checked_pointer_cast<SparseIndexType>(sparse_index), tensor.type(),
         data, tensor.shape(), tensor.dim_names_);
 
@@ -490,7 +490,7 @@ int64_t StridedTensorCountNonZero(int dim_index, int64_t offset, const Tensor& t
   if (dim_index == tensor.ndim() - 1) {
     for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {
       const auto* ptr = tensor.raw_data() + offset + i * tensor.strides()[dim_index];
-      auto& elem = *reinterpret_cast<const c_type*>(ptr);
+      auto elem = *reinterpret_cast<const c_type*>(ptr);
       if (internal::is_not_zero<TYPE>(elem)) {
         ++nnz;
       }
 
@@ -24,9 +24,6 @@
 
 namespace arrow {
 
-template <typename VISITOR, typename... ARGS>
-Status VisitTypeInline(const DataType& type, VISITOR* visitor, ARGS&&... args);
-
 namespace internal {
 
 struct SparseTensorConverterMixin {
@@ -71,52 +68,14 @@ Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSFTensor(
 template <typename Converter>
 struct ConverterVisitor {
   explicit ConverterVisitor(Converter& converter) : converter(converter) {}
-  template <typename ValueType, typename IndexType>
-  Status operator()(const ValueType& value, const IndexType& index_type) {
-    return converter.Convert(value, index_type);
-  }
 
-  Converter& converter;
-};
-
-struct ValueTypeVisitor {
-  template <typename ValueType, typename IndexType, typename Function>
-  enable_if_number<ValueType, Status> Visit(const ValueType& value_type,
-                                            const IndexType& index_type,
-                                            Function&& function) {
-    return function(value_type, index_type);
+  template <typename... Args>
+  Status operator()(Args&&... args) {
+    return converter.Convert(std::forward<Args>(args)...);
   }
 
-  template <typename IndexType, typename Function>
-  Status Visit(const DataType& value_type, const IndexType&, Function&&) {
-    return Status::Invalid("Invalid value type: ", value_type.name(),
-                           ". Expected a number.");
-  }
-};
-
-struct IndexAndValueTypeVisitor {
-  template <typename IndexType, typename Function>
-  enable_if_integer<IndexType, Status> Visit(const IndexType& index_type,
-                                             const DataType& value_type,
-                                             Function&& function) {
-    ValueTypeVisitor visitor;
-    return VisitTypeInline(value_type, &visitor, index_type,
-                           std::forward<Function>(function));
-  }
-
-  template <typename Function>
-  Status Visit(const DataType& type, const DataType&, Function&&) {
-    return Status::Invalid("Invalid index type: ", type.name(), ". Expected integer.");
-  }
+  Converter& converter;
 };
 
-template <typename Function>
-Status VisitValueAndIndexType(const DataType& value_type, const DataType& index_type,
-                              Function&& function) {
-  IndexAndValueTypeVisitor visitor;
-  return VisitTypeInline(index_type, &visitor, value_type,
-                         std::forward<Function>(function));
-}
-
 }  // namespace internal
 }  // namespace arrow
@@ -18,7 +18,6 @@
 #include "arrow/tensor/converter_internal.h"
 
 #include <algorithm>
-#include <cmath>
 #include <cstdint>
 #include <memory>
 #include <numeric>
@@ -28,11 +27,10 @@
 #include "arrow/status.h"
 #include "arrow/tensor.h"
 #include "arrow/type.h"
-#include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging_internal.h"
 #include "arrow/util/macros.h"
-#include "arrow/visit_type_inline.h"
+#include "arrow/util/sparse_tensor_util.h"
 
 namespace arrow {
 
@@ -42,57 +40,6 @@ namespace internal {
 
 namespace {
 
-template <typename ValueType, typename IndexType>
-Status ValidateSparseCooTensorCreation(const SparseCOOIndex& sparse_coo_index,
-                                       const Buffer& sparse_coo_values_buffer,
-                                       const Tensor& tensor) {
-  using IndexCType = typename IndexType::c_type;
-  using ValueCType = typename ValueType::c_type;
-
-  const auto& indices = sparse_coo_index.indices();
-  const auto* indices_data = sparse_coo_index.indices()->data()->data_as<IndexCType>();
-  const auto* sparse_coo_values = sparse_coo_values_buffer.data_as<ValueCType>();
-
-  ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
-
-  if (indices->shape()[0] != non_zero_count) {
-    return Status::Invalid("Mismatch between non-zero count in sparse tensor (",
-                           indices->shape()[0], ") and dense tensor (", non_zero_count,
-                           ")");
-  } else if (indices->shape()[1] != static_cast<int64_t>(tensor.shape().size())) {
-    return Status::Invalid("Mismatch between coordinate dimension in sparse tensor (",
-                           indices->shape()[1], ") and tensor shape (",
-                           tensor.shape().size(), ")");
-  }
-
-  auto coord_size = indices->shape()[1];
-  std::vector<int64_t> coord(coord_size);
-  for (int64_t i = 0; i < indices->shape()[0]; i++) {
-    if (!is_not_zero<ValueType>(sparse_coo_values[i])) {
-      return Status::Invalid("Sparse tensor values must be non-zero");
-    }
-
-    for (int64_t j = 0; j < coord_size; j++) {
-      coord[j] = static_cast<int64_t>(indices_data[i * coord_size + j]);
-    }
-
-    if (sparse_coo_values[i] != tensor.Value<ValueType>(coord)) {
-      if constexpr (is_floating_type<ValueType>::value) {
-        if (!std::isnan(tensor.Value<ValueType>(coord)) ||
-            !std::isnan(sparse_coo_values[i])) {
-          return Status::Invalid(
-              "Inconsistent values between sparse tensor and dense tensor");
-        }
-      } else {
-        return Status::Invalid(
-            "Inconsistent values between sparse tensor and dense tensor");
-      }
-    }
-  }
-
-  return Status::OK();
-}
-
 template <typename IndexCType>
 inline void IncrementRowMajorIndex(std::vector<IndexCType>& coord,
                                    const std::vector<int64_t>& shape) {
@@ -265,8 +212,6 @@ class SparseCOOTensorConverter {
                                            indices_shape, indices_strides);
     ARROW_ASSIGN_OR_RAISE(sparse_index, SparseCOOIndex::Make(coords, true));
     data = std::move(values_buffer);
-    DCHECK_OK((ValidateSparseCooTensorCreation<ValueType, IndexType>(*sparse_index, *data,
-                                                                     tensor_)));
     return Status::OK();
   }
 
@@ -328,7 +273,8 @@ Status MakeSparseCOOTensorFromTensor(const Tensor& tensor,
                                      std::shared_ptr<Buffer>* out_data) {
   SparseCOOTensorConverter converter(tensor, index_value_type, pool);
   ConverterVisitor visitor{converter};
-  ARROW_RETURN_NOT_OK(VisitValueAndIndexType(*tensor.type(), *index_value_type, visitor));
+  ARROW_RETURN_NOT_OK(
+      util::VisitCOOTensorType(*tensor.type(), *index_value_type, visitor));
   *out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index);
   *out_data = converter.data;
   return Status::OK();
 
@@ -18,7 +18,6 @@
 #include "arrow/tensor/converter_internal.h"
 
 #include <algorithm>
-#include <cmath>
 #include <cstdint>
 #include <limits>
 #include <memory>
@@ -30,11 +29,10 @@
 #include "arrow/status.h"
 #include "arrow/tensor.h"
 #include "arrow/type.h"
-#include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging_internal.h"
 #include "arrow/util/sort_internal.h"
-#include "arrow/visit_type_inline.h"
+#include "arrow/util/sparse_tensor_util.h"
 
 namespace arrow {
 
@@ -58,89 +56,6 @@ inline void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_
   }
 }
 
-template <typename ValueType, typename IndexType>
-Status CheckValues(const SparseCSFIndex& sparse_csf_index,
-                   const typename ValueType::c_type* values, const Tensor& tensor,
-                   const int64_t dim, const int64_t dim_offset, const int64_t start,
-                   const int64_t stop) {
-  using ValueCType = typename ValueType::c_type;
-  using IndexCType = typename IndexType::c_type;
-
-  const auto& indices = sparse_csf_index.indices();
-  const auto& indptr = sparse_csf_index.indptr();
-  const auto& axis_order = sparse_csf_index.axis_order();
-  auto ndim = indices.size();
-  auto strides = tensor.strides();
-
-  const auto& cur_indices = indices[dim];
-  const auto* indices_data = cur_indices->data()->data_as<IndexCType>() + start;
-
-  if (dim == static_cast<int64_t>(ndim) - 1) {
-    for (auto i = start; i < stop; ++i) {
-      auto index = static_cast<int64_t>(*indices_data);
-      const int64_t offset = dim_offset + index * strides[axis_order[dim]];
-
-      auto sparse_value = values[i];
-      auto tensor_value =
-          *reinterpret_cast<const ValueCType*>(tensor.raw_data() + offset);
-      if (!is_not_zero<ValueType>(sparse_value)) {
-        return Status::Invalid("Sparse tensor values must be non-zero");
-      } else if (sparse_value != tensor_value) {
-        if constexpr (is_floating_type<ValueType>::value) {
-          if (!std::isnan(tensor_value) || !std::isnan(sparse_value)) {
-            return Status::Invalid(
-                "Inconsistent values between sparse tensor and dense tensor");
-          }
-        } else {
-          return Status::Invalid(
-              "Inconsistent values between sparse tensor and dense tensor");
-        }
-      }
-      ++indices_data;
-    }
-  } else {
-    const auto& cur_indptr = indptr[dim];
-    const auto* indptr_data = cur_indptr->data()->data_as<IndexCType>() + start;
-
-    for (int64_t i = start; i < stop; ++i) {
-      const int64_t index = *indices_data;
-      int64_t offset = dim_offset + index * strides[axis_order[dim]];
-      auto next_start = static_cast<int64_t>(*indptr_data);
-      auto next_stop = static_cast<int64_t>(*(indptr_data + 1));
-
-      ARROW_RETURN_NOT_OK((CheckValues<ValueType, IndexType>(
-          sparse_csf_index, values, tensor, dim + 1, offset, next_start, next_stop)));
-
-      ++indices_data;
-      ++indptr_data;
-    }
-  }
-  return Status::OK();
-}
-
-template <typename ValueType, typename IndexType>
-Status ValidateSparseTensorCSFCreation(const SparseIndex& sparse_index,
-                                       const Buffer& values_buffer,
-                                       const Tensor& tensor) {
-  auto sparse_csf_index = checked_cast<const SparseCSFIndex&>(sparse_index);
-  const auto* values = values_buffer.data_as<typename ValueType::c_type>();
-  const auto& indices = sparse_csf_index.indices();
-
-  ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
-  if (indices.back()->size() != non_zero_count) {
-    return Status::Invalid("Mismatch between non-zero count in sparse tensor (",
-                           indices.back()->size(), ") and dense tensor (", non_zero_count,
-                           ")");
-  } else if (indices.size() != tensor.shape().size()) {
-    return Status::Invalid("Mismatch between coordinate dimension in sparse tensor (",
-                           indices.size(), ") and tensor shape (", tensor.shape().size(),
-                           ")");
-  } else {
-    return CheckValues<ValueType, IndexType>(sparse_csf_index, values, tensor, 0, 0, 0,
-                                             sparse_csf_index.indptr()[0]->size() - 1);
-  }
-}
-
 // ----------------------------------------------------------------------
 // SparseTensorConverter for SparseCSFIndex
 
@@ -151,8 +66,10 @@ class SparseCSFTensorConverter {
                            MemoryPool* pool)
       : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {}
 
-  template <typename ValueType, typename IndexType>
-  Status Convert(const ValueType&, const IndexType&) {
+  // Note: The same type is considered for both indices and indptr during
+  // tensor-to-CSF-tensor conversion.
+  template <typename ValueType, typename IndexType, typename IndexPointerType>
+  Status Convert(const ValueType&, const IndexType&, const IndexPointerType&) {
     using ValueCType = typename ValueType::c_type;
     using IndexCType = typename IndexType::c_type;
     RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_,
@@ -235,8 +152,6 @@ class SparseCSFTensorConverter {
     ARROW_ASSIGN_OR_RAISE(
         sparse_index, SparseCSFIndex::Make(index_value_type_, indices_shapes, axis_order,
                                            indptr_buffers, indices_buffers));
-    DCHECK_OK((ValidateSparseTensorCSFCreation<ValueType, IndexType>(*sparse_index, *data,
-                                                                     tensor_)));
     return Status::OK();
   }
 
@@ -353,7 +268,8 @@ Status MakeSparseCSFTensorFromTensor(const Tensor& tensor,
                                      std::shared_ptr<Buffer>* out_data) {
   SparseCSFTensorConverter converter(tensor, index_value_type, pool);
   ConverterVisitor visitor{converter};
-  ARROW_RETURN_NOT_OK(VisitValueAndIndexType(*tensor.type(), *index_value_type, visitor));
+  ARROW_RETURN_NOT_OK(
+      util::VisitCSXType(*tensor.type(), *index_value_type, *index_value_type, visitor));
   *out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index);
   *out_data = converter.data;
   return Status::OK();
Original file line number	Diff line number	Diff line change
`@@ -490,7 +490,7 @@ int64_t StridedTensorCountNonZero(int dim_index, int64_t offset, const Tensor& t`
`490`	`490`	`if (dim_index == tensor.ndim() - 1) {`
`491`	`491`	`for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) {`
`492`	`492`	`const auto* ptr = tensor.raw_data() + offset + i * tensor.strides()[dim_index];`
`493`		`- auto& elem = reinterpret_cast<const c_type>(ptr);`
	`493`	`+ auto elem = reinterpret_cast<const c_type>(ptr);`
`494`	`494`	`if (internal::is_not_zero<TYPE>(elem)) {`
`495`	`495`	`++nnz;`
`496`	`496`	`}`