Skip to content

Commit 9921660

Browse files
authored
Add partioned_matrix and tdb_partitioned_matrix unit tests & improve error handling (#381)
1 parent 412f953 commit 9921660

File tree

8 files changed

+534
-40
lines changed

8 files changed

+534
-40
lines changed

src/include/detail/linalg/matrix.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,8 +461,8 @@ constexpr auto SubMatrix(
461461

462462
// TODO(paris): This only works on col-major matrices, fix for row-major.
463463
template <class Matrix>
464-
void debug_matrix(const Matrix& matrix, const std::string& msg = "") {
465-
auto max_size = 10;
464+
void debug_matrix(
465+
const Matrix& matrix, const std::string& msg = "", size_t max_size = 10) {
466466
auto rowsEnd = std::min(dimensions(matrix), static_cast<size_t>(max_size));
467467
auto colsEnd = std::min(num_vectors(matrix), static_cast<size_t>(max_size));
468468

src/include/detail/linalg/matrix_with_ids.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,12 +215,13 @@ using ColMajorMatrixWithIds = MatrixWithIds<T, IdsType, stdx::layout_left, I>;
215215
// TODO(paris): This only works on col-major matrices, fix for row-major.
216216
template <class MatrixWithIds>
217217
void debug_matrix_with_ids(
218-
const MatrixWithIds& matrix, const std::string& msg = "") {
219-
auto max_size = 10;
218+
const MatrixWithIds& matrix,
219+
const std::string& msg = "",
220+
size_t max_size = 10) {
220221
auto rowsEnd = std::min(dimensions(matrix), static_cast<size_t>(max_size));
221222
auto colsEnd = std::min(num_vectors(matrix), static_cast<size_t>(max_size));
222223

223-
debug_matrix(matrix);
224+
debug_matrix(matrix, msg, max_size);
224225

225226
std::cout << "# ids: [";
226227
auto end = std::min(matrix.num_ids(), static_cast<size_t>(max_size));

src/include/detail/linalg/partitioned_matrix.h

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545

4646
#include <cstddef>
4747
#include "detail/linalg/matrix.h"
48+
#include "detail/linalg/vector.h"
4849

4950
/**
5051
* @brief Partitioned matrix class.
@@ -154,6 +155,18 @@ class PartitionedMatrix : public Matrix<T, LayoutPolicy, I> {
154155
, part_index_(num_parts + 1)
155156
, num_vectors_{::num_vectors(training_set)}
156157
, num_parts_{num_parts} {
158+
if (num_vectors_ == 0) {
159+
throw std::invalid_argument("training_set cannot be empty.");
160+
}
161+
if (size(part_labels) != ::num_vectors(training_set)) {
162+
throw std::invalid_argument(
163+
"The number of part_labels must equal the number of vectors in the "
164+
"training_set.");
165+
}
166+
if (num_parts <= 0) {
167+
throw std::invalid_argument("num_parts should be greater than 0.");
168+
}
169+
157170
auto degrees = std::vector<size_t>(num_parts);
158171

159172
for (size_t i = 0; i < ::num_vectors(training_set); ++i) {
@@ -203,10 +216,6 @@ class PartitionedMatrix : public Matrix<T, LayoutPolicy, I> {
203216
return part_index_;
204217
}
205218

206-
auto& indices() {
207-
return part_index_;
208-
}
209-
210219
virtual bool load() {
211220
return false;
212221
}
@@ -244,12 +253,13 @@ using ColMajorPartitionedMatrix = PartitionedMatrix<
244253

245254
template <class PartitionedMatrix>
246255
void debug_partitioned_matrix(
247-
const PartitionedMatrix& matrix, const std::string& msg = "") {
248-
auto max_size = 10;
249-
auto rowsEnd = std::min(dimension(matrix), static_cast<size_t>(max_size));
256+
const PartitionedMatrix& matrix,
257+
const std::string& msg = "",
258+
size_t max_size = 10) {
259+
auto rowsEnd = std::min(dimensions(matrix), static_cast<size_t>(max_size));
250260
auto colsEnd = std::min(num_vectors(matrix), static_cast<size_t>(max_size));
251261

252-
debug_matrix(matrix, msg);
262+
debug_matrix(matrix, msg, max_size);
253263

254264
std::cout << "# ids: [";
255265
auto end = std::min(matrix.ids().size(), static_cast<size_t>(max_size));

src/include/detail/linalg/tdb_partitioned_matrix.h

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,14 @@ class tdbPartitionedMatrix
294294
, squashed_indices_(size(relevant_parts_) + 1)
295295
, first_resident_part_{0}
296296
, last_resident_part_{0} {
297+
if (relevant_parts_.size() >= indices.size()) {
298+
throw std::runtime_error(
299+
"Invalid partitioning, relevant_parts_ size (" +
300+
std::to_string(relevant_parts_.size()) +
301+
") must be less than indices size (" +
302+
std::to_string(indices.size()) + ")");
303+
}
304+
297305
total_num_parts_ = size(relevant_parts_);
298306

299307
scoped_timer _{tdb_func__ + " " + partitioned_vectors_uri_};
@@ -620,27 +628,7 @@ class tdbPartitionedMatrix
620628
num_loads_++;
621629
return true;
622630
}
623-
#if 0
624-
auto& vectors() const {
625-
return *this;
626-
}
627-
628-
index_type num_resident_parts() const {
629-
return last_resident_part_ - first_resident_part_;
630-
}
631631

632-
index_type resident_part_offset() const {
633-
return resident_part_offset_;
634-
}
635-
636-
index_type col_offset() const {
637-
return resident_col_offset_;
638-
}
639-
640-
size_t num_loads() const {
641-
return num_loads_;
642-
}
643-
#endif
644632
/**
645633
* Destructor. Closes arrays if they are open.
646634
*/

src/include/detail/linalg/vector.h

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -161,26 +161,47 @@ class Vector : public std::span<T> {
161161
};
162162

163163
template <feature_vector V>
164-
void debug_vector(const V& v, const std::string& msg = "") {
164+
void debug_vector(
165+
const V& v, const std::string& msg = "", size_t max_size = 10) {
166+
size_t end = std::min(max_size, dimensions(v));
165167
std::cout << msg << ": [";
166-
for (size_t i = 0; i < dimensions(v); ++i) {
167-
std::cout << v[i] << " ";
168+
for (size_t i = 0; i < end; ++i) {
169+
std::cout << v[i];
170+
if (i != end - 1) {
171+
std::cout << ", ";
172+
}
173+
}
174+
if (dimensions(v) > max_size) {
175+
std::cout << "...";
168176
}
169177
std::cout << "]\n";
170178
}
171179

172180
template <std::ranges::forward_range V>
173-
void debug_vector(const V& v, const std::string& msg = "") {
181+
void debug_vector(
182+
const V& v, const std::string& msg = "", size_t max_size = 10) {
183+
size_t end = std::min(max_size, dimensions(v));
174184
std::cout << msg << ": [";
185+
int idx = 0;
175186
for (auto&& i : v) {
176-
std::cout << i << " ";
187+
if (idx++ >= max_size) {
188+
break;
189+
}
190+
std::cout << i;
191+
if (i != end - 1) {
192+
std::cout << ", ";
193+
}
194+
}
195+
if (dimensions(v) > max_size) {
196+
std::cout << "...";
177197
}
178198
std::cout << "]\n";
179199
}
180200

181201
template <feature_vector V>
182-
void debug_matrix(const V& v, const std::string& msg = "") {
183-
debug_vector(v, msg);
202+
void debug_matrix(
203+
const V& v, const std::string& msg = "", size_t max_size = 10) {
204+
debug_vector(v, msg, max_size);
184205
}
185206

186207
#endif // TILEDB_VECTOR_H

src/include/test/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ kmeans_add_test(unit_l2_distance)
126126

127127
kmeans_add_test(unit_matrix)
128128

129+
kmeans_add_test(unit_partitioned_matrix)
130+
129131
kmeans_add_test(unit_matrix_with_ids)
130132

131133
kmeans_add_test(unit_mdspan)
@@ -152,6 +154,8 @@ kmeans_add_test(unit_tdb_matrix)
152154

153155
kmeans_add_test(unit_tdb_matrix_with_ids)
154156

157+
kmeans_add_test(unit_tdb_partitioned_matrix)
158+
155159
kmeans_add_test(unit_utils)
156160

157161
kmeans_add_test(unit_vector)
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/**
2+
* @file unit_partitioned_matrix.cc
3+
*
4+
* @section LICENSE
5+
*
6+
* The MIT License
7+
*
8+
* @copyright Copyright (c) 2024 TileDB, Inc.
9+
*
10+
* Permission is hereby granted, free of charge, to any person obtaining a copy
11+
* of this software and associated documentation files (the "Software"), to deal
12+
* in the Software without restriction, including without limitation the rights
13+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14+
* copies of the Software, and to permit persons to whom the Software is
15+
* furnished to do so, subject to the following conditions:
16+
*
17+
* The above copyright notice and this permission notice shall be included in
18+
* all copies or substantial portions of the Software.
19+
*
20+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26+
* THE SOFTWARE.
27+
*
28+
* @section DESCRIPTION
29+
*
30+
*/
31+
32+
#include <algorithm>
33+
#include <catch2/catch_all.hpp>
34+
#include <vector>
35+
#include "cpos.h"
36+
#include "detail/linalg/partitioned_matrix.h"
37+
#include "mdspan/mdspan.hpp"
38+
39+
TEST_CASE("partitioned_matrix: test test", "[partitioned_matrix]") {
40+
REQUIRE(true);
41+
}
42+
43+
TEST_CASE("partitioned_matrix: sizes constructor", "[partitioned_matrix]") {
44+
using feature_type = int;
45+
using id_type = int;
46+
using part_index_type = int;
47+
size_t dimensions = 3;
48+
size_t max_num_vectors = 5;
49+
size_t max_num_partitions = 2;
50+
51+
auto partitioned_matrix =
52+
ColMajorPartitionedMatrix<feature_type, id_type, part_index_type>(
53+
dimensions, max_num_vectors, max_num_partitions);
54+
CHECK(partitioned_matrix.num_vectors() == 0);
55+
CHECK(partitioned_matrix.num_partitions() == 0);
56+
CHECK(std::equal(
57+
partitioned_matrix.ids().begin(),
58+
partitioned_matrix.ids().end(),
59+
std::vector<part_index_type>{0, 0, 0, 0, 0}.begin()));
60+
CHECK(std::equal(
61+
partitioned_matrix.indices().begin(),
62+
partitioned_matrix.indices().end(),
63+
std::vector<part_index_type>{0, 0, 0}.begin()));
64+
65+
CHECK(partitioned_matrix.load() == false);
66+
CHECK(partitioned_matrix.num_vectors() == 0);
67+
CHECK(partitioned_matrix.num_partitions() == 0);
68+
CHECK(std::equal(
69+
partitioned_matrix.ids().begin(),
70+
partitioned_matrix.ids().end(),
71+
std::vector<part_index_type>{0, 0, 0, 0, 0}.begin()));
72+
CHECK(std::equal(
73+
partitioned_matrix.indices().begin(),
74+
partitioned_matrix.indices().end(),
75+
std::vector<part_index_type>{0, 0, 0}.begin()));
76+
}
77+
78+
TEST_CASE("partitioned_matrix: vectors constructor", "[partitioned_matrix]") {
79+
using feature_type = float;
80+
using id_type = float;
81+
using part_index_type = float;
82+
83+
auto parts =
84+
ColMajorMatrix<feature_type>{{1, 1, 1}, {2, 2, 2}, {3, 3, 3}, {4, 4, 4}};
85+
std::vector<id_type> ids = {1, 2, 3, 4};
86+
std::vector<part_index_type> part_index = {0, 1, 4};
87+
88+
auto partitioned_matrix =
89+
ColMajorPartitionedMatrix<feature_type, id_type, part_index_type>(
90+
parts, ids, part_index);
91+
92+
CHECK(partitioned_matrix.num_vectors() == 4);
93+
CHECK(partitioned_matrix.num_partitions() == 2);
94+
CHECK(std::equal(
95+
partitioned_matrix.ids().begin(),
96+
partitioned_matrix.ids().end(),
97+
std::vector<part_index_type>{1, 2, 3, 4}.begin()));
98+
CHECK(std::equal(
99+
partitioned_matrix.indices().begin(),
100+
partitioned_matrix.indices().end(),
101+
std::vector<part_index_type>{0, 1, 4}.begin()));
102+
103+
CHECK(partitioned_matrix.load() == false);
104+
CHECK(partitioned_matrix.num_vectors() == 4);
105+
CHECK(partitioned_matrix.num_partitions() == 2);
106+
CHECK(std::equal(
107+
partitioned_matrix.ids().begin(),
108+
partitioned_matrix.ids().end(),
109+
std::vector<part_index_type>{1, 2, 3, 4}.begin()));
110+
CHECK(std::equal(
111+
partitioned_matrix.indices().begin(),
112+
partitioned_matrix.indices().end(),
113+
std::vector<part_index_type>{0, 1, 4}.begin()));
114+
}
115+
116+
TEST_CASE("partitioned_matrix: training constructor", "[partitioned_matrix]") {
117+
using feature_type = uint64_t;
118+
using id_type = uint64_t;
119+
using part_index_type = uint64_t;
120+
121+
auto training_set =
122+
ColMajorMatrix<feature_type>{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}};
123+
std::vector<id_type> part_labels = {1, 0, 1, 0, 1};
124+
size_t num_parts = 2;
125+
126+
auto partitioned_matrix =
127+
ColMajorPartitionedMatrix<feature_type, id_type, part_index_type>(
128+
training_set, part_labels, num_parts);
129+
CHECK(partitioned_matrix.num_vectors() == _cpo::num_vectors(training_set));
130+
CHECK(partitioned_matrix.num_partitions() == num_parts);
131+
CHECK(std::equal(
132+
partitioned_matrix.data(),
133+
partitioned_matrix.data() + partitioned_matrix.num_vectors() *
134+
_cpo::dimensions(partitioned_matrix),
135+
std::vector<feature_type>{2, 2, 4, 4, 1, 1, 3, 3, 5, 5}.begin()));
136+
CHECK(std::equal(
137+
partitioned_matrix.ids().begin(),
138+
partitioned_matrix.ids().end(),
139+
std::vector<part_index_type>{1, 3, 0, 2, 4}.begin()));
140+
CHECK(std::equal(
141+
partitioned_matrix.indices().begin(),
142+
partitioned_matrix.indices().end(),
143+
std::vector<part_index_type>{0, 2, 5}.begin()));
144+
}

0 commit comments

Comments
 (0)