Skip to content

Commit 47b4029

Browse files
authored
Merge pull request #3 from GraphBLAS/optimize-value
Add optimization to minimize the types in the values array
2 parents 80e1229 + 65707f5 commit 47b4029

File tree

5 files changed

+134
-4
lines changed

5 files changed

+134
-4
lines changed

examples/mtx2bsp.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ int main(int argc, char** argv) {
4141
bsp_matrix_t matrix = bsp_mmread(input_fname);
4242
printf(" === Done reading. ===\n");
4343

44+
matrix = bsp_matrix_minimize_values(matrix);
45+
4446
bsp_print_matrix_info(matrix);
4547

4648
printf(" === Writing to %s... ===\n", output_fname);

include/binsparse/binsparse.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@
99
#include <binsparse/matrix_market/matrix_market_inspector.h>
1010
#include <binsparse/matrix_market/matrix_market_read.h>
1111
#include <binsparse/matrix_market/matrix_market_write.h>
12+
#include <binsparse/minimize_values.h>
1213
#include <binsparse/read_matrix.h>
1314
#include <binsparse/write_matrix.h>

include/binsparse/hdf5_wrapper.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,22 @@ int bsp_write_array(hid_t f, char* label, bsp_array_t array) {
1616
hid_t fspace = H5Screate_simple(1, (hsize_t[]){array.size}, NULL);
1717
hid_t lcpl = H5Pcreate(H5P_LINK_CREATE);
1818

19-
hid_t dset = H5Dcreate2(f, label, hdf5_standard_type, fspace, lcpl,
20-
H5P_DEFAULT, H5P_DEFAULT);
19+
hid_t dcpl = H5Pcreate(H5P_DATASET_CREATE);
20+
21+
// Choose 1 MiB, the default chunk cache size, as our chunk size.
22+
size_t chunk_size = 1024 * 1024 / bsp_type_size(array.type);
23+
24+
// If the dataset is smaller than the chunk size, cap the chunk size.
25+
if (array.size < chunk_size) {
26+
chunk_size = array.size;
27+
}
28+
29+
H5Pset_chunk(dcpl, 1, (hsize_t[]){chunk_size});
30+
31+
H5Pset_deflate(dcpl, 9);
32+
33+
hid_t dset =
34+
H5Dcreate2(f, label, hdf5_standard_type, fspace, lcpl, dcpl, H5P_DEFAULT);
2135

2236
if (dset == H5I_INVALID_HID) {
2337
return -1;
@@ -34,6 +48,7 @@ int bsp_write_array(hid_t f, char* label, bsp_array_t array) {
3448

3549
H5Sclose(fspace);
3650
H5Pclose(lcpl);
51+
H5Pclose(dcpl);
3752

3853
return 0;
3954
}

include/binsparse/matrix_market/matrix_market_write.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,12 @@ void bsp_mmwrite(char* file_path, bsp_matrix_t matrix) {
6464
bsp_array_read(matrix.indices_1, count, j);
6565
fprintf(f, "%zu %zu\n", i + 1, j + 1);
6666
} else if (mm_type == BSP_MM_INTEGER) {
67-
size_t i, j, value;
67+
size_t i, j;
68+
int64_t value;
6869
bsp_array_read(matrix.indices_0, count, i);
6970
bsp_array_read(matrix.indices_1, count, j);
7071
bsp_array_read(matrix.values, count, value);
71-
fprintf(f, "%zu %zu %zu\n", i + 1, j + 1, value);
72+
fprintf(f, "%zu %zu %lld\n", i + 1, j + 1, (long long)value);
7273
} else if (mm_type == BSP_MM_REAL) {
7374
size_t i, j;
7475
double value;
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#pragma once
2+
3+
#include <binsparse/matrix.h>
4+
#include <stdbool.h>
5+
6+
bsp_matrix_t bsp_matrix_minimize_values(bsp_matrix_t matrix) {
7+
if (matrix.values.type == BSP_FLOAT64) {
8+
bool float32_representable = true;
9+
10+
double* values = (double*)matrix.values.data;
11+
12+
for (size_t i = 0; i < matrix.values.size; i++) {
13+
if (((float)values[i]) != values[i]) {
14+
float32_representable = false;
15+
}
16+
}
17+
18+
if (float32_representable) {
19+
bsp_array_t new_values =
20+
bsp_construct_array_t(matrix.values.size, BSP_FLOAT32);
21+
22+
float* n_values = (float*)new_values.data;
23+
24+
for (size_t i = 0; i < matrix.values.size; i++) {
25+
n_values[i] = values[i];
26+
}
27+
28+
bsp_destroy_array_t(matrix.values);
29+
matrix.values = new_values;
30+
}
31+
} else if (matrix.values.type == BSP_INT64) {
32+
int64_t* values = (int64_t*)matrix.values.data;
33+
34+
int64_t min_value = values[0];
35+
int64_t max_value = values[0];
36+
37+
for (size_t i = 1; i < matrix.values.size; i++) {
38+
if (values[i] > max_value) {
39+
max_value = values[i];
40+
}
41+
42+
if (values[i] < min_value) {
43+
min_value = values[i];
44+
}
45+
}
46+
47+
bsp_type_t value_type;
48+
if (min_value >= 0) {
49+
// No negative values => unsigned integers
50+
if (max_value <= (int64_t)UINT8_MAX) {
51+
value_type = BSP_UINT8;
52+
} else if (max_value <= (int64_t)UINT16_MAX) {
53+
value_type = BSP_UINT16;
54+
} else if (max_value <= (int64_t)UINT32_MAX) {
55+
value_type = BSP_UINT32;
56+
} else {
57+
value_type = BSP_UINT64;
58+
}
59+
} else {
60+
// Negative values => signed integers
61+
if (max_value <= (int64_t)INT8_MAX && min_value >= (int64_t)INT8_MIN) {
62+
value_type = BSP_INT8;
63+
} else if (max_value <= (int64_t)INT16_MAX &&
64+
min_value >= (int64_t)INT16_MIN) {
65+
value_type = BSP_INT16;
66+
} else if (max_value <= (int64_t)INT32_MAX &&
67+
min_value >= (int64_t)INT32_MIN) {
68+
value_type = BSP_INT32;
69+
} else {
70+
value_type = BSP_INT64;
71+
}
72+
}
73+
bsp_array_t new_values =
74+
bsp_construct_array_t(matrix.values.size, value_type);
75+
76+
for (size_t i = 0; i < matrix.values.size; i++) {
77+
int64_t value;
78+
bsp_array_read(matrix.values, i, value);
79+
bsp_array_write(new_values, i, value);
80+
}
81+
82+
bsp_destroy_array_t(matrix.values);
83+
matrix.values = new_values;
84+
} else if (matrix.values.type == BSP_COMPLEX_FLOAT64) {
85+
bool float32_representable = true;
86+
87+
double _Complex* values = (double _Complex*)matrix.values.data;
88+
89+
for (size_t i = 0; i < matrix.values.size; i++) {
90+
if (((float _Complex)values[i]) != values[i]) {
91+
float32_representable = false;
92+
}
93+
}
94+
95+
if (float32_representable) {
96+
bsp_array_t new_values =
97+
bsp_construct_array_t(matrix.values.size, BSP_COMPLEX_FLOAT32);
98+
99+
float _Complex* n_values = (float _Complex*)new_values.data;
100+
101+
for (size_t i = 0; i < matrix.values.size; i++) {
102+
n_values[i] = values[i];
103+
}
104+
105+
bsp_destroy_array_t(matrix.values);
106+
matrix.values = new_values;
107+
}
108+
}
109+
110+
return matrix;
111+
}

0 commit comments

Comments
 (0)