Skip to content

Commit deb3686

Browse files
committed
apply pitrou suggestion
1 parent fef77e1 commit deb3686

File tree

6 files changed

+249
-19
lines changed

6 files changed

+249
-19
lines changed

cpp/src/arrow/sparse_tensor.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
// under the License.
1717

1818
#include "arrow/sparse_tensor.h"
19-
#include "arrow/tensor/converter.h"
19+
#include "arrow/tensor/converter_internal.h"
2020

2121
#include <algorithm>
2222
#include <functional>

cpp/src/arrow/sparse_tensor_test.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ class TestSparseCOOTensorCreationFromNegativeZero
436436
AssertCOOIndex(si->indices(), 0, {4});
437437
AssertCOOIndex(si->indices(), 1, {9});
438438
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor());
439+
ASSERT_OK(new_tensor->Validate());
439440
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
440441
}
441442

@@ -459,6 +460,7 @@ class TestSparseCOOTensorCreationFromNegativeZero
459460
AssertCOOIndex(si->indices(), 0, {1, 1});
460461
AssertCOOIndex(si->indices(), 1, {3, 0});
461462
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor());
463+
ASSERT_OK(new_tensor->Validate());
462464
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
463465
}
464466

@@ -485,6 +487,7 @@ class TestSparseCOOTensorCreationFromNegativeZero
485487
AssertCOOIndex(si->indices(), 0, {1, 1});
486488
AssertCOOIndex(si->indices(), 1, {3, 0});
487489
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor());
490+
ASSERT_OK(new_tensor->Validate());
488491
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
489492
}
490493

@@ -994,6 +997,7 @@ class TestSparseCSRTensorCreationFromNegativeZero
994997
ASSERT_EQ(indices[0], 1);
995998
ASSERT_EQ(indices[1], 0);
996999
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csr_tensor->ToTensor());
1000+
ASSERT_OK(new_tensor->Validate());
9971001
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
9981002
}
9991003

@@ -1374,6 +1378,7 @@ class TestSparseCSCTensorCreationFromNegativeZero
13741378
ASSERT_EQ(indices[0], 3);
13751379
ASSERT_EQ(indices[1], 1);
13761380
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csc_tensor->ToTensor());
1381+
ASSERT_OK(new_tensor->Validate());
13771382
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
13781383
}
13791384

@@ -1696,6 +1701,7 @@ class TestSparseCSFTensorCreationFromNegativeZero
16961701
EXPECT_EQ(column_indices[0], 0);
16971702
EXPECT_EQ(column_indices[1], 1);
16981703
ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csf_tensor->ToTensor());
1704+
ASSERT_OK(new_tensor->Validate());
16991705
ASSERT_TRUE(new_tensor->Equals(*dense_tensor));
17001706
}
17011707

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@
2222
#include <memory>
2323
#include <utility>
2424

25-
#include "arrow/visit_type_inline.h"
26-
2725
namespace arrow {
26+
27+
template <typename VISITOR, typename... ARGS>
28+
Status VisitTypeInline(const DataType& type, VISITOR* visitor, ARGS&&... args);
29+
2830
namespace internal {
2931

3032
struct SparseTensorConverterMixin {
@@ -95,25 +97,24 @@ struct ValueTypeVisitor {
9597
struct IndexAndValueTypeVisitor {
9698
template <typename IndexType, typename Function>
9799
enable_if_integer<IndexType, Status> Visit(const IndexType& index_type,
98-
const std::shared_ptr<DataType>& value_type,
100+
const DataType& value_type,
99101
Function&& function) {
100102
ValueTypeVisitor visitor;
101-
return VisitTypeInline(*value_type, &visitor, index_type,
103+
return VisitTypeInline(value_type, &visitor, index_type,
102104
std::forward<Function>(function));
103105
}
104106

105107
template <typename Function>
106-
Status Visit(const DataType& type, const std::shared_ptr<DataType>&, Function&&) {
108+
Status Visit(const DataType& type, const DataType&, Function&&) {
107109
return Status::Invalid("Invalid index type: ", type.name(), ". Expected integer.");
108110
}
109111
};
110112

111113
template <typename Function>
112-
Status VisitValueAndIndexType(const std::shared_ptr<DataType>& value_type,
113-
const std::shared_ptr<DataType>& index_type,
114+
Status VisitValueAndIndexType(const DataType& value_type, const DataType& index_type,
114115
Function&& function) {
115116
IndexAndValueTypeVisitor visitor;
116-
return VisitTypeInline(*index_type, &visitor, value_type,
117+
return VisitTypeInline(index_type, &visitor, value_type,
117118
std::forward<Function>(function));
118119
}
119120

cpp/src/arrow/tensor/coo_converter.cc

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
#include "arrow/tensor/converter.h"
18+
#include "arrow/tensor/converter_internal.h"
1919

2020
#include <algorithm>
21+
#include <cmath>
2122
#include <cstdint>
2223
#include <memory>
2324
#include <numeric>
@@ -27,8 +28,11 @@
2728
#include "arrow/status.h"
2829
#include "arrow/tensor.h"
2930
#include "arrow/type.h"
31+
#include "arrow/type_traits.h"
3032
#include "arrow/util/checked_cast.h"
33+
#include "arrow/util/logging_internal.h"
3134
#include "arrow/util/macros.h"
35+
#include "arrow/visit_type_inline.h"
3236

3337
namespace arrow {
3438

@@ -38,6 +42,57 @@ namespace internal {
3842

3943
namespace {
4044

45+
template <typename ValueType, typename IndexType>
46+
Status ValidateSparseCooTensorCreation(const SparseCOOIndex& sparse_coo_index,
47+
const Buffer& sparse_coo_values_buffer,
48+
const Tensor& tensor) {
49+
using IndexCType = typename IndexType::c_type;
50+
using ValueCType = typename ValueType::c_type;
51+
52+
const auto& indices = sparse_coo_index.indices();
53+
const auto* indices_data = sparse_coo_index.indices()->data()->data_as<IndexCType>();
54+
const auto* sparse_coo_values = sparse_coo_values_buffer.data_as<ValueCType>();
55+
56+
ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
57+
58+
if (indices->shape()[0] != non_zero_count) {
59+
return Status::Invalid("Mismatch between non-zero count in sparse tensor (",
60+
indices->shape()[0], ") and dense tensor (", non_zero_count,
61+
")");
62+
} else if (indices->shape()[1] != static_cast<int64_t>(tensor.shape().size())) {
63+
return Status::Invalid("Mismatch between coordinate dimension in sparse tensor (",
64+
indices->shape()[1], ") and tensor shape (",
65+
tensor.shape().size(), ")");
66+
}
67+
68+
auto coord_size = indices->shape()[1];
69+
std::vector<int64_t> coord(coord_size);
70+
for (int64_t i = 0; i < indices->shape()[0]; i++) {
71+
if (!is_not_zero<ValueType>(sparse_coo_values[i])) {
72+
return Status::Invalid("Sparse tensor values must be non-zero");
73+
}
74+
75+
for (int64_t j = 0; j < coord_size; j++) {
76+
coord[j] = static_cast<int64_t>(indices_data[i * coord_size + j]);
77+
}
78+
79+
if (sparse_coo_values[i] != tensor.Value<ValueType>(coord)) {
80+
if constexpr (is_floating_type<ValueType>::value) {
81+
if (!std::isnan(tensor.Value<ValueType>(coord)) ||
82+
!std::isnan(sparse_coo_values[i])) {
83+
return Status::Invalid(
84+
"Inconsistent values between sparse tensor and dense tensor");
85+
}
86+
} else {
87+
return Status::Invalid(
88+
"Inconsistent values between sparse tensor and dense tensor");
89+
}
90+
}
91+
}
92+
93+
return Status::OK();
94+
}
95+
4196
template <typename IndexCType>
4297
inline void IncrementRowMajorIndex(std::vector<IndexCType>& coord,
4398
const std::vector<int64_t>& shape) {
@@ -210,7 +265,8 @@ class SparseCOOTensorConverter {
210265
indices_shape, indices_strides);
211266
ARROW_ASSIGN_OR_RAISE(sparse_index, SparseCOOIndex::Make(coords, true));
212267
data = std::move(values_buffer);
213-
268+
DCHECK_OK((ValidateSparseCooTensorCreation<ValueType, IndexType>(*sparse_index, *data,
269+
tensor_)));
214270
return Status::OK();
215271
}
216272

@@ -272,7 +328,7 @@ Status MakeSparseCOOTensorFromTensor(const Tensor& tensor,
272328
std::shared_ptr<Buffer>* out_data) {
273329
SparseCOOTensorConverter converter(tensor, index_value_type, pool);
274330
ConverterVisitor visitor{converter};
275-
ARROW_RETURN_NOT_OK(VisitValueAndIndexType(tensor.type(), index_value_type, visitor));
331+
ARROW_RETURN_NOT_OK(VisitValueAndIndexType(*tensor.type(), *index_value_type, visitor));
276332
*out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index);
277333
*out_data = converter.data;
278334
return Status::OK();

cpp/src/arrow/tensor/csf_converter.cc

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
#include "arrow/tensor/converter.h"
18+
#include "arrow/tensor/converter_internal.h"
1919

2020
#include <algorithm>
21+
#include <cmath>
2122
#include <cstdint>
2223
#include <limits>
2324
#include <memory>
@@ -29,8 +30,11 @@
2930
#include "arrow/status.h"
3031
#include "arrow/tensor.h"
3132
#include "arrow/type.h"
33+
#include "arrow/type_traits.h"
3234
#include "arrow/util/checked_cast.h"
35+
#include "arrow/util/logging_internal.h"
3336
#include "arrow/util/sort_internal.h"
37+
#include "arrow/visit_type_inline.h"
3438

3539
namespace arrow {
3640

@@ -54,6 +58,89 @@ inline void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_
5458
}
5559
}
5660

61+
template <typename ValueType, typename IndexType>
62+
Status CheckValues(const SparseCSFIndex& sparse_csf_index,
63+
const typename ValueType::c_type* values, const Tensor& tensor,
64+
const int64_t dim, const int64_t dim_offset, const int64_t start,
65+
const int64_t stop) {
66+
using ValueCType = typename ValueType::c_type;
67+
using IndexCType = typename IndexType::c_type;
68+
69+
const auto& indices = sparse_csf_index.indices();
70+
const auto& indptr = sparse_csf_index.indptr();
71+
const auto& axis_order = sparse_csf_index.axis_order();
72+
auto ndim = indices.size();
73+
auto strides = tensor.strides();
74+
75+
const auto& cur_indices = indices[dim];
76+
const auto* indices_data = cur_indices->data()->data_as<IndexCType>() + start;
77+
78+
if (dim == static_cast<int64_t>(ndim) - 1) {
79+
for (auto i = start; i < stop; ++i) {
80+
auto index = static_cast<int64_t>(*indices_data);
81+
const int64_t offset = dim_offset + index * strides[axis_order[dim]];
82+
83+
auto sparse_value = values[i];
84+
auto tensor_value =
85+
*reinterpret_cast<const ValueCType*>(tensor.raw_data() + offset);
86+
if (!is_not_zero<ValueType>(sparse_value)) {
87+
return Status::Invalid("Sparse tensor values must be non-zero");
88+
} else if (sparse_value != tensor_value) {
89+
if constexpr (is_floating_type<ValueType>::value) {
90+
if (!std::isnan(tensor_value) || !std::isnan(sparse_value)) {
91+
return Status::Invalid(
92+
"Inconsistent values between sparse tensor and dense tensor");
93+
}
94+
} else {
95+
return Status::Invalid(
96+
"Inconsistent values between sparse tensor and dense tensor");
97+
}
98+
}
99+
++indices_data;
100+
}
101+
} else {
102+
const auto& cur_indptr = indptr[dim];
103+
const auto* indptr_data = cur_indptr->data()->data_as<IndexCType>() + start;
104+
105+
for (int64_t i = start; i < stop; ++i) {
106+
const int64_t index = *indices_data;
107+
int64_t offset = dim_offset + index * strides[axis_order[dim]];
108+
auto next_start = static_cast<int64_t>(*indptr_data);
109+
auto next_stop = static_cast<int64_t>(*(indptr_data + 1));
110+
111+
ARROW_RETURN_NOT_OK((CheckValues<ValueType, IndexType>(
112+
sparse_csf_index, values, tensor, dim + 1, offset, next_start, next_stop)));
113+
114+
++indices_data;
115+
++indptr_data;
116+
}
117+
}
118+
return Status::OK();
119+
}
120+
121+
template <typename ValueType, typename IndexType>
122+
Status ValidateSparseTensorCSFCreation(const SparseIndex& sparse_index,
123+
const Buffer& values_buffer,
124+
const Tensor& tensor) {
125+
auto sparse_csf_index = checked_cast<const SparseCSFIndex&>(sparse_index);
126+
const auto* values = values_buffer.data_as<typename ValueType::c_type>();
127+
const auto& indices = sparse_csf_index.indices();
128+
129+
ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero());
130+
if (indices.back()->size() != non_zero_count) {
131+
return Status::Invalid("Mismatch between non-zero count in sparse tensor (",
132+
indices.back()->size(), ") and dense tensor (", non_zero_count,
133+
")");
134+
} else if (indices.size() != tensor.shape().size()) {
135+
return Status::Invalid("Mismatch between coordinate dimension in sparse tensor (",
136+
indices.size(), ") and tensor shape (", tensor.shape().size(),
137+
")");
138+
} else {
139+
return CheckValues<ValueType, IndexType>(sparse_csf_index, values, tensor, 0, 0, 0,
140+
sparse_csf_index.indptr()[0]->size() - 1);
141+
}
142+
}
143+
57144
// ----------------------------------------------------------------------
58145
// SparseTensorConverter for SparseCSFIndex
59146

@@ -88,8 +175,10 @@ class SparseCSFTensorConverter {
88175
std::vector<int64_t> coord(ndim, 0);
89176
std::vector<int64_t> previous_coord(ndim, -1);
90177

91-
std::vector<TypedBufferBuilder<IndexCType>> indptr_buffer_builders(ndim - 1);
92-
std::vector<TypedBufferBuilder<IndexCType>> indices_buffer_builders(ndim);
178+
std::vector<TypedBufferBuilder<IndexCType>> indptr_buffer_builders(
179+
ndim - 1, TypedBufferBuilder<IndexCType>(pool_));
180+
std::vector<TypedBufferBuilder<IndexCType>> indices_buffer_builders(
181+
ndim, TypedBufferBuilder<IndexCType>(pool_));
93182

94183
auto* values = values_buffer->mutable_data_as<ValueCType>();
95184

@@ -146,6 +235,8 @@ class SparseCSFTensorConverter {
146235
ARROW_ASSIGN_OR_RAISE(
147236
sparse_index, SparseCSFIndex::Make(index_value_type_, indices_shapes, axis_order,
148237
indptr_buffers, indices_buffers));
238+
DCHECK_OK((ValidateSparseTensorCSFCreation<ValueType, IndexType>(*sparse_index, *data,
239+
tensor_)));
149240
return Status::OK();
150241
}
151242

@@ -262,7 +353,7 @@ Status MakeSparseCSFTensorFromTensor(const Tensor& tensor,
262353
std::shared_ptr<Buffer>* out_data) {
263354
SparseCSFTensorConverter converter(tensor, index_value_type, pool);
264355
ConverterVisitor visitor{converter};
265-
ARROW_RETURN_NOT_OK(VisitValueAndIndexType(tensor.type(), index_value_type, visitor));
356+
ARROW_RETURN_NOT_OK(VisitValueAndIndexType(*tensor.type(), *index_value_type, visitor));
266357
*out_sparse_index = checked_pointer_cast<SparseIndex>(converter.sparse_index);
267358
*out_data = converter.data;
268359
return Status::OK();

0 commit comments

Comments
 (0)