Skip to content

Commit da1306a

Browse files
committed
Non-square matrix CCS assignment bugfix
1 parent 5c00d3c commit da1306a

File tree

2 files changed

+69
-41
lines changed

2 files changed

+69
-41
lines changed

include/graphblas/reference/blas3.hpp

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,11 +1062,19 @@ namespace grb {
10621062

10631063
const size_t nzc = capacity( C );
10641064

1065+
#ifdef _H_GRB_REFERENCE_OMP_BLAS3
1066+
#pragma omp parallel for simd
1067+
#endif
1068+
for( size_t i = 0; i <= n_A; i++ ) {
1069+
C_ccs_raw.col_start[ i ] = 0;
1070+
}
1071+
10651072
C_crs_raw.col_start[ 0 ] = 0;
1066-
C_ccs_raw.col_start[ 0 ] = 0;
10671073
// Prefix sum computation into L.CRS.col_start
10681074
#ifdef _H_GRB_REFERENCE_OMP_BLAS3
1069-
#pragma omp parallel for default( none ) shared( B_raw, A_raw, C_crs_raw, std::cout ) firstprivate( m_A )
1075+
#pragma omp parallel for default( none ) \
1076+
shared( B_raw, A_raw, C_crs_raw, C_ccs_raw, std::cout ) \
1077+
firstprivate( m_A )
10701078
#endif
10711079
for( size_t i = 0; i < m_A; i++ ) {
10721080
auto B_k = B_raw.col_start[ i ];
@@ -1086,48 +1094,65 @@ namespace grb {
10861094
}
10871095
if( B_raw.row_index[ B_k ] == j ) {
10881096
cumul += 1;
1097+
C_ccs_raw.col_start[ j + 1 ] += 1;
10891098
}
10901099
}
10911100
C_crs_raw.col_start[ i + 1 ] = cumul;
10921101
}
10931102

10941103
#ifdef _DEBUG
10951104
// Print the CRS prefix sum
1096-
std::cout << "CRS prefix sum: ";
1105+
std::cout << "before nCRS prefix sum: ";
10971106
for( size_t i = 0; i <= m_A; i++ ) {
10981107
std::cout << C_crs_raw.col_start[ i ] << " ";
10991108
}
11001109
std::cout << "\n";
1110+
// Print the CCS prefix sum
1111+
std::cout << "before nCCS prefix sum: ";
1112+
for( size_t i = 0; i <= n_A; i++ ) {
1113+
std::cout << C_ccs_raw.col_start[ i ] << " ";
1114+
}
1115+
std::cout << "\n";
11011116
#endif
11021117

11031118
// Apply the prefix sum
11041119
for( size_t i = 1; i <= m_A; i++ ) {
11051120
C_crs_raw.col_start[ i ] += C_crs_raw.col_start[ i - 1 ];
1106-
C_ccs_raw.col_start[ i ] = C_crs_raw.col_start[ i ];
11071121
}
1122+
for ( size_t i = 1; i <= n_A; i++ ) {
1123+
C_ccs_raw.col_start[ i ] += C_ccs_raw.col_start[ i - 1 ];
1124+
}
1125+
1126+
#ifdef _DEBUG
1127+
// Print the CRS prefix sum
1128+
std::cout << "after nCRS prefix sum: ";
1129+
for( size_t i = 0; i <= m_A; i++ ) {
1130+
std::cout << C_crs_raw.col_start[ i ] << " ";
1131+
}
1132+
std::cout << "\n";
1133+
// Print the CCS prefix sum
1134+
std::cout << "after nCCS prefix sum: ";
1135+
for( size_t i = 0; i <= n_A; i++ ) {
1136+
std::cout << C_ccs_raw.col_start[ i ] << " ";
1137+
}
1138+
std::cout << "\n";
1139+
#endif
11081140

11091141
// Check if the number of nonzeros is greater than the capacity
1110-
if( C_crs_raw.col_start[ m_A ] > nzc || C_ccs_raw.col_start[ m_A ] > nzc ) {
1142+
if( C_crs_raw.col_start[ m_A ] > nzc || C_ccs_raw.col_start[ n_A ] > nzc ) {
11111143
#ifdef _DEBUG
11121144
std::cout << "Insufficient capacity detected for requested operation.\n"
1113-
<< "Requested " << C_crs_raw.col_start[ m_A ] << " nonzeros"
1145+
<< "Requested " << C_ccs_raw.col_start[ m_A ] << " nonzeros"
11141146
<< " but capacity is " << nzc << "\n";
11151147
#endif
11161148
return MISMATCH;
11171149
}
11181150

1119-
#ifdef _H_GRB_REFERENCE_OMP_BLAS3
1120-
#pragma omp parallel for simd
1121-
#endif
1122-
for( size_t i = 0; i < m_A; i++ ) {
1123-
C_crs_raw.row_index[ i ] = C_ccs_raw.row_index[ i ] = 0;
1124-
}
1125-
11261151
RC local_rc = rc;
11271152
#ifdef _H_GRB_REFERENCE_OMP_BLAS3
11281153
#pragma omp parallel default( none ) \
1129-
shared( C_ccs_raw, C_crs_raw, A_raw, B_raw, rc, std::cout ) \
1130-
firstprivate( local_rc, m_A, oper, A_identity, B_identity )
1154+
shared( C_ccs_raw, C_crs_raw, A_raw, B_raw, rc, std::cout ) \
1155+
firstprivate( local_rc, m_A, oper, A_identity, B_identity )
11311156
#endif
11321157
{
11331158
size_t start_row = 0;
@@ -1144,6 +1169,7 @@ namespace grb {
11441169
const auto A_k_end = A_raw.col_start[ i + 1 ];
11451170
for( auto A_k = A_k_start; A_k < A_k_end; ++A_k ) {
11461171
const auto j = A_raw.row_index[ A_k ];
1172+
11471173
while( B_k < B_raw.col_start[ i + 1 ]
11481174
&& B_raw.row_index[ B_k ] > j
11491175
) {
@@ -1165,16 +1191,17 @@ namespace grb {
11651191

11661192
C_crs_raw.row_index[ C_k ] = j;
11671193
C_crs_raw.setValue( C_k, c_val );
1168-
C_ccs_raw.row_index[ C_k ] = i;
1169-
C_ccs_raw.setValue( C_k, c_val );
1194+
1195+
C_ccs_raw.row_index[ C_ccs_raw.col_start[ j ] ] = i;
1196+
C_ccs_raw.setValue( C_ccs_raw.col_start[ j ], c_val );
11701197
#ifdef _DEBUG
11711198
std::cout << "A( " + std::to_string( i ) + ";"
11721199
+ std::to_string( j ) + " ) = "
11731200
+ std::to_string( a_val ) + "\n";
11741201
std::cout << "B( " + std::to_string( i ) + ";"
11751202
+ std::to_string( j ) + " ) = "
11761203
+ std::to_string( b_val ) + "\n";
1177-
std::cout << "C.crs( " + std::to_string( i ) + ";"
1204+
std::cout << "C( " + std::to_string( i ) + ";"
11781205
+ std::to_string( j ) + " ) = "
11791206
+ std::to_string( c_val ) + "\n";
11801207
#endif

tests/unit/eWiseApply_matrix.cpp

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,24 @@
2323

2424
using namespace grb;
2525

26-
void grb_program( const int &, grb::RC &rc ) {
26+
void grb_program( const size_t &n, grb::RC &rc ) {
2727

2828
// large non-square mixed-domain matrix check
2929
{
30-
grb::Matrix< char > A( 10000000, 2000000 );
31-
grb::Matrix< float > B( 10000000, 2000000 );
32-
grb::Matrix< size_t > C( 10000000, 2000000 );
33-
size_t * I = new size_t[ 2000000 ];
34-
size_t * J = new size_t[ 2000000 ];
35-
char * V = new char[ 2000000 ];
36-
for( size_t k = 0; k < 2000000; ++k ) {
37-
I[ k ] = J[ k ] = k;
30+
grb::Matrix< char > A( n, 2*n );
31+
grb::Matrix< float > B( n, 2*n );
32+
grb::Matrix< size_t > C( n, 2*n );
33+
size_t * I = new size_t[ n ];
34+
size_t * J = new size_t[ n ];
35+
char * V = new char[ n ];
36+
for( size_t k = 0; k < n; ++k ) {
37+
I[ k ] = k;
38+
J[ k ] = k+n;
3839
V[ k ] = 2;
3940
}
40-
rc = grb::buildMatrixUnique( A, I, J, V, 2000000, SEQUENTIAL );
41-
rc = rc ? rc : grb::buildMatrixUnique( B, I, J, V, 2000000, SEQUENTIAL );
42-
rc = rc ? rc : grb::buildMatrixUnique( C, I, J, V, 2000000, SEQUENTIAL );
41+
rc = grb::buildMatrixUnique( A, I, J, V, n, SEQUENTIAL );
42+
rc = rc ? rc : grb::buildMatrixUnique( B, I, J, V, n, SEQUENTIAL );
43+
rc = rc ? rc : grb::buildMatrixUnique( C, I, J, V, n, SEQUENTIAL );
4344
rc = rc ? rc : grb::eWiseApply( C, A, B,
4445
grb::operators::add< float, size_t, char >(), RESIZE );
4546
rc = rc ? rc : grb::eWiseApply( C, A, B,
@@ -49,13 +50,14 @@ void grb_program( const int &, grb::RC &rc ) {
4950
<< "mixed-domain matrix check\n";
5051
return;
5152
}
53+
5254
for( const auto &triple : C ) {
53-
const size_t &i = triple.first.first;
54-
const size_t &j = triple.first.second;
55-
const size_t &v = triple.second;
56-
if( i != j ) {
57-
std::cout << "Unexpected entry at position ( " << i << ", " << j << " ) "
58-
<< "-- only expected entries on the diagonal\n";
55+
const auto &i = triple.first.first;
56+
const auto &j = triple.first.second;
57+
const auto &v = triple.second;
58+
if( j != i+n ) {
59+
std::cout << "Unexpected entry at position ( " << i << ", " << i+n << " ) "
60+
<< "-- only expected entries on the n-th diagonal\n";
5961
rc = FAILED;
6062
}
6163
if( v != 4 ) {
@@ -74,15 +76,14 @@ void grb_program( const int &, grb::RC &rc ) {
7476

7577
int main( int argc, char ** argv ) {
7678
// defaults
77-
bool printUsage = false;
78-
int input = 0; // unused
79+
size_t input = 1000; // unused
7980

8081
// error checking
8182
if( argc > 1 ) {
82-
printUsage = true;
83+
input = std::strtoul( argv[ 1 ], nullptr, 10 );
8384
}
84-
if( printUsage ) {
85-
std::cerr << "Usage: " << argv[ 0 ] << "\n";
85+
if( argc > 2 ) {
86+
std::cerr << "Usage: " << argv[ 0 ] << "[n]\n";
8687
return 1;
8788
}
8889

0 commit comments

Comments
 (0)