Skip to content

Commit 68c0d2b

Browse files
authored
Merge pull request #4 from GraphBLAS/matrix-conversion
Add basic infrastructure for matrix conversions, COO -> CSR
2 parents 0167f9e + 59aef62 commit 68c0d2b

File tree

12 files changed

+351
-66
lines changed

12 files changed

+351
-66
lines changed

examples/bsp-ls.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ void print_group_info(hid_t g, const char* name) {
3535
assert(format_ != NULL);
3636
char* format_string = cJSON_GetStringValue(format_);
3737

38-
cJSON* nnz_ = cJSON_GetObjectItemCaseSensitive(binsparse, "nnz");
38+
cJSON* nnz_ =
39+
cJSON_GetObjectItemCaseSensitive(binsparse, "number_of_stored_values");
3940
assert(nnz_ != NULL);
4041
size_t nnz = cJSON_GetNumberValue(nnz_);
4142

@@ -59,6 +60,15 @@ void print_group_info(hid_t g, const char* name) {
5960

6061
printf("Group \"%s\": Version %s Binsparse matrix. Format %s, %zu x %zu.\n",
6162
full_group_path, version_string, format_string, nrows, ncols);
63+
64+
cJSON* data_types =
65+
cJSON_GetObjectItemCaseSensitive(binsparse, "data_types");
66+
assert(data_types != NULL);
67+
68+
cJSON* item;
69+
cJSON_ArrayForEach(item, data_types) {
70+
printf(" %s: %s\n", item->string, cJSON_Print(item));
71+
}
6272
}
6373

6474
H5Literate(g, H5_INDEX_NAME, H5_ITER_INC, NULL, visit_group, NULL);

examples/check_equivalence.c

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ int check_array_equivalence(bsp_array_t array1, bsp_array_t array2) {
5353
bsp_array_read(array2, i, value2);
5454

5555
if (value1 != value2) {
56-
fprintf(stderr, "Array values are not equal.\n");
56+
fprintf(stderr, "Array values are not equal. (%zu != %zu)\n", value1,
57+
value2);
5758
return 4;
5859
}
5960
} else if (mm_type1 == BSP_MM_REAL) {
@@ -62,7 +63,8 @@ int check_array_equivalence(bsp_array_t array1, bsp_array_t array2) {
6263
bsp_array_read(array2, i, value2);
6364

6465
if (value1 != value2) {
65-
fprintf(stderr, "Array values are not equal.\n");
66+
fprintf(stderr, "Array values are not equal. (%.17lg != %.17lg)\n",
67+
value1, value2);
6668
return 4;
6769
}
6870
} else if (mm_type1 == BSP_MM_COMPLEX) {
@@ -71,7 +73,11 @@ int check_array_equivalence(bsp_array_t array1, bsp_array_t array2) {
7173
bsp_array_read(array2, i, value2);
7274

7375
if (value1 != value2) {
74-
fprintf(stderr, "Array values are not equal.\n");
76+
fprintf(stderr,
77+
"Array values are not equal. (%.17lg + i%.17lg != %.17lg + "
78+
"i%.17lg)\n",
79+
__real__ value1, __imag__ value1, __real__ value2,
80+
__imag__ value2);
7581
return 4;
7682
}
7783
}
@@ -80,39 +86,6 @@ int check_array_equivalence(bsp_array_t array1, bsp_array_t array2) {
8086
return 0;
8187
}
8288

83-
typedef struct {
84-
char* fname;
85-
char* dataset;
86-
} bsp_fdataset_info_t;
87-
88-
bsp_fdataset_info_t bsp_parse_fdataset_string(char* str) {
89-
size_t len = strlen(str);
90-
91-
int split = -1;
92-
for (int i = len - 1; i >= 0; i--) {
93-
if (str[i] == ':') {
94-
split = i;
95-
break;
96-
}
97-
}
98-
99-
if (split == -1) {
100-
bsp_fdataset_info_t info;
101-
info.fname = (char*) malloc(sizeof(char) * (len + 1));
102-
strcpy(info.fname, str);
103-
info.dataset = NULL;
104-
return info;
105-
} else {
106-
bsp_fdataset_info_t info;
107-
info.fname = (char*) malloc(sizeof(char) * (split + 1));
108-
strncpy(info.fname, str, split);
109-
info.fname[split] = '\0';
110-
info.dataset = (char*) malloc(sizeof(char) * (len - split));
111-
strcpy(info.dataset, &str[split + 1]);
112-
return info;
113-
}
114-
}
115-
11689
int main(int argc, char** argv) {
11790
if (argc < 3) {
11891
printf(
@@ -134,6 +107,32 @@ int main(int argc, char** argv) {
134107
bsp_matrix_t matrix1 = bsp_read_matrix(info1.fname, info1.dataset);
135108
bsp_matrix_t matrix2 = bsp_read_matrix(info2.fname, info2.dataset);
136109

110+
bool perform_suitesparse_declamping = true;
111+
if (perform_suitesparse_declamping &&
112+
strcmp(bsp_get_file_extension(file1), ".mtx") == 0) {
113+
bsp_matrix_declamp_values(matrix1);
114+
}
115+
116+
if (perform_suitesparse_declamping &&
117+
strcmp(bsp_get_file_extension(file2), ".mtx") == 0) {
118+
bsp_matrix_declamp_values(matrix2);
119+
}
120+
121+
// If matrices are not the same format, try to convert.
122+
if (matrix1.format != matrix2.format) {
123+
if (matrix1.format != BSP_COOR) {
124+
bsp_matrix_t intermediate = bsp_convert_matrix(matrix1, BSP_COOR);
125+
bsp_destroy_matrix_t(matrix1);
126+
matrix1 = intermediate;
127+
}
128+
129+
if (matrix2.format != BSP_COOR) {
130+
bsp_matrix_t intermediate = bsp_convert_matrix(matrix2, BSP_COOR);
131+
bsp_destroy_matrix_t(matrix2);
132+
matrix2 = intermediate;
133+
}
134+
}
135+
137136
if (matrix1.format != matrix2.format) {
138137
fprintf(stderr, "Formats do not match. (%s != %s)\n",
139138
bsp_get_matrix_format_string(matrix1.format),

examples/mtx2bsp.c

Lines changed: 65 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,31 @@
44
int main(int argc, char** argv) {
55

66
if (argc < 3) {
7-
printf("usage: ./mtx2bsp [inputfile_name.mtx] [outputfile_name.bsp.hdf5] "
8-
"[optional: dataset]\n");
7+
printf("usage: ./mtx2bsp [input.mtx] [output.bsp.h5]:[optional: group] "
8+
"[optional: format]\n");
9+
printf("\n");
10+
printf("Description: Convert a Matrix Market file to a Binsparse HDF5 "
11+
"file.\n");
12+
printf(" Users can optionally provide an HDF5 group to store "
13+
"the\n");
14+
printf(" file in as well as a specific format. The default "
15+
"format\n");
16+
printf(" is row-sorted COO (COOR).\n");
17+
printf("\n");
18+
printf("example: ./mtx2bsp chesapeake.mtx chesapeake.bsp.h5\n");
19+
printf(" - Convert Matrix Market file `chesapeake.mtx` to Binsparse "
20+
"HDF5 file `chesapeake.bsp.h5`.\n");
21+
printf(" - Matrix will be stored in root group.\n");
22+
printf(" - Matrix will be stored in COOR format.\n");
23+
printf("\n");
24+
printf("example: ./mtx2bsp chesapeake.mtx chesapeake.bsp.h5:chesapeake\n");
25+
printf(" - Same as previous example, but matrix will be stored in "
26+
"HDF5 group `chesapeake`.\n");
27+
printf("\n");
28+
printf(
29+
"example: ./mtx2bsp chesapeake.mtx chesapeake.bsp.h5:chesapeake CSR\n");
30+
printf(" - Same as previous example, but matrix will use CSR "
31+
"format.\n");
932
return 1;
1033
}
1134

@@ -15,16 +38,47 @@ int main(int argc, char** argv) {
1538
bool perform_suitesparse_declamping = true;
1639

1740
char* input_fname = argv[1];
18-
char* output_fname = argv[2];
1941

20-
char* group_name = NULL;
42+
bsp_fdataset_info_t info2 = bsp_parse_fdataset_string(argv[2]);
43+
char* output_fname = info2.fname;
44+
char* group_name = info2.dataset;
45+
46+
char* format_name = NULL;
2147

2248
if (argc >= 4) {
23-
group_name = argv[3];
49+
format_name = argv[3];
50+
}
51+
52+
char* input_file_extension = bsp_get_file_extension(input_fname);
53+
char* output_file_extension = bsp_get_file_extension(output_fname);
54+
55+
if (input_file_extension == NULL ||
56+
strcmp(input_file_extension, ".mtx") != 0) {
57+
fprintf(stderr,
58+
"error: input file \"%s\" is not a Matrix Market file. "
59+
"(Its extension is not '.mtx'.)\n",
60+
input_fname);
61+
return 1;
62+
}
63+
64+
if (output_file_extension == NULL ||
65+
(strcmp(output_file_extension, ".h5") != 0 &&
66+
strcmp(output_file_extension, ".hdf5") != 0)) {
67+
fprintf(stderr,
68+
"error: output file \"%s\" is not an HDF5 file. "
69+
"(Its extension is not '.h5' or '.hdf5'.)\n",
70+
output_fname);
71+
return 1;
2472
}
2573

2674
bsp_mm_metadata m = bsp_mmread_metadata(input_fname);
2775

76+
bsp_matrix_format_t format = BSP_COOR;
77+
if (format_name != NULL) {
78+
format = bsp_get_matrix_format(format_name);
79+
assert(format != 0);
80+
}
81+
2882
printf("%lu x %lu matrix with %lu nonzeros.\n", m.nrows, m.ncols, m.nnz);
2983
printf(
3084
"Matrix Market format is \"%s\" with type \"%s\" and structure \"%s\"\n",
@@ -52,6 +106,12 @@ int main(int argc, char** argv) {
52106

53107
matrix = bsp_matrix_minimize_values(matrix);
54108

109+
if (format != BSP_COOR) {
110+
bsp_matrix_t converted_matrix = bsp_convert_matrix(matrix, format);
111+
bsp_destroy_matrix_t(matrix);
112+
matrix = converted_matrix;
113+
}
114+
55115
bsp_print_matrix_info(matrix);
56116

57117
printf(" === Writing to %s... ===\n", output_fname);

include/binsparse/binsparse.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#define BINSPARSE_VERSION "0.1"
44

55
#include <binsparse/array.h>
6+
#include <binsparse/convert_matrix.h>
67
#include <binsparse/detail/detail.h>
78
#include <binsparse/generate.h>
89
#include <binsparse/hdf5_wrapper.h>

include/binsparse/convert_matrix.h

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#pragma once
2+
3+
#include <assert.h>
4+
#include <binsparse/matrix.h>
5+
6+
bsp_matrix_t bsp_convert_matrix(bsp_matrix_t matrix,
7+
bsp_matrix_format_t format) {
8+
// Throw an error if matrix already in desired format.
9+
if (matrix.format == format) {
10+
assert(false);
11+
}
12+
13+
if (format == BSP_COOR) {
14+
// *Convert to COO* from another format.
15+
if (matrix.format == BSP_CSR) {
16+
// Convert CSR -> COOR
17+
bsp_matrix_t result = bsp_construct_default_matrix_t();
18+
19+
result.format = BSP_COOR;
20+
21+
// Inherit NNZ, nrows, ncols, ISO-ness, and structure directly from
22+
// original matrix.
23+
result.nnz = matrix.nnz;
24+
result.nrows = matrix.nrows;
25+
result.ncols = matrix.ncols;
26+
result.is_iso = matrix.is_iso;
27+
result.structure = matrix.structure;
28+
29+
size_t max_dim =
30+
(matrix.nrows > matrix.ncols) ? matrix.nrows : matrix.ncols;
31+
32+
bsp_type_t index_type = bsp_pick_integer_type(max_dim);
33+
34+
result.values = bsp_copy_construct_array_t(matrix.values);
35+
36+
// There is a corner case with tall and skinny matrices where we need a
37+
// higher width for rowind. In order to keep rowind/colind the same type,
38+
// we might upcast.
39+
40+
if (index_type == matrix.indices_0.type) {
41+
result.indices_1 = bsp_copy_construct_array_t(matrix.indices_0);
42+
} else {
43+
result.indices_1 = bsp_construct_array_t(matrix.nnz, index_type);
44+
for (size_t i = 0; i < matrix.nnz; i++) {
45+
bsp_array_awrite(result.indices_1, i, matrix.indices_0, i);
46+
}
47+
}
48+
49+
result.indices_0 = bsp_construct_array_t(matrix.nnz, index_type);
50+
51+
for (size_t i = 0; i < matrix.nrows; i++) {
52+
size_t row_begin, row_end;
53+
bsp_array_read(matrix.pointers_to_1, i, row_begin);
54+
bsp_array_read(matrix.pointers_to_1, i + 1, row_end);
55+
for (size_t j_ptr = row_begin; j_ptr < row_end; j_ptr++) {
56+
bsp_array_write(result.indices_0, j_ptr, i);
57+
}
58+
}
59+
return result;
60+
} else {
61+
assert(false);
62+
}
63+
} else {
64+
// Convert to any another format.
65+
66+
// Currently only support COOR -> X.
67+
// If matrix is not COOR, convert to COOR.
68+
if (matrix.format != BSP_COOR) {
69+
bsp_matrix_t intermediate = bsp_convert_matrix(matrix, BSP_COOR);
70+
bsp_matrix_t result = bsp_convert_matrix(intermediate, format);
71+
bsp_destroy_matrix_t(intermediate);
72+
return result;
73+
} else {
74+
if (format == BSP_CSR) {
75+
// Convert COOR -> CSR
76+
77+
bsp_matrix_t result = bsp_construct_default_matrix_t();
78+
79+
result.format = BSP_CSR;
80+
81+
result.nrows = matrix.nrows;
82+
result.ncols = matrix.ncols;
83+
result.nnz = matrix.nnz;
84+
result.is_iso = matrix.is_iso;
85+
result.structure = matrix.structure;
86+
87+
// TODO: consider whether to produce files with varying integer types
88+
// for row indices, column indices, and offsets.
89+
90+
size_t max_dim =
91+
(matrix.nrows > matrix.ncols) ? matrix.nrows : matrix.ncols;
92+
93+
size_t max_value =
94+
(max_dim > matrix.values.size) ? max_dim : matrix.values.size;
95+
96+
bsp_type_t value_type = matrix.values.type;
97+
bsp_type_t index_type = bsp_pick_integer_type(max_value);
98+
99+
// Since COOR is sorted by rows and then by columns, values and column
100+
// indices can be copied exactly. Values' type will not change, but
101+
// column indices might, thus the extra branch.
102+
103+
result.values = bsp_copy_construct_array_t(matrix.values);
104+
105+
if (index_type == matrix.indices_1.type) {
106+
result.indices_0 = bsp_copy_construct_array_t(matrix.indices_1);
107+
} else {
108+
result.indices_0 = bsp_construct_array_t(matrix.nnz, index_type);
109+
110+
for (size_t i = 0; i < matrix.nnz; i++) {
111+
bsp_array_awrite(result.indices_0, i, matrix.indices_1, i);
112+
}
113+
}
114+
115+
result.pointers_to_1 =
116+
bsp_construct_array_t(matrix.nrows + 1, index_type);
117+
118+
bsp_array_t rowptr = result.pointers_to_1;
119+
120+
bsp_array_write(rowptr, 0, 0);
121+
122+
size_t r = 0;
123+
size_t c = 0;
124+
for (size_t c = 0; c < matrix.nnz; c++) {
125+
size_t j;
126+
bsp_array_read(matrix.indices_0, c, j);
127+
128+
while (r < j) {
129+
assert(r + 1 <= matrix.nrows);
130+
131+
bsp_array_write(rowptr, r + 1, c);
132+
r++;
133+
}
134+
}
135+
136+
for (; r < matrix.nrows; r++) {
137+
bsp_array_write(rowptr, r + 1, matrix.nnz);
138+
}
139+
140+
return result;
141+
} else {
142+
assert(false);
143+
}
144+
}
145+
}
146+
}

0 commit comments

Comments
 (0)