@@ -1062,11 +1062,19 @@ namespace grb {
10621062
10631063 const size_t nzc = capacity ( C );
10641064
1065+ #ifdef _H_GRB_REFERENCE_OMP_BLAS3
1066+ #pragma omp parallel for simd
1067+ #endif
1068+ for ( size_t i = 0 ; i <= n_A; i++ ) {
1069+ C_ccs_raw.col_start [ i ] = 0 ;
1070+ }
1071+
10651072 C_crs_raw.col_start [ 0 ] = 0 ;
1066- C_ccs_raw.col_start [ 0 ] = 0 ;
10671073 // Prefix sum computation into L.CRS.col_start
10681074#ifdef _H_GRB_REFERENCE_OMP_BLAS3
1069- #pragma omp parallel for default( none ) shared( B_raw, A_raw, C_crs_raw, std::cout ) firstprivate( m_A )
1075+ #pragma omp parallel for default( none ) \
1076+ shared ( B_raw, A_raw, C_crs_raw, C_ccs_raw, std::cout ) \
1077+ firstprivate ( m_A )
10701078#endif
10711079 for ( size_t i = 0 ; i < m_A; i++ ) {
10721080 auto B_k = B_raw.col_start [ i ];
@@ -1086,48 +1094,65 @@ namespace grb {
10861094 }
10871095 if ( B_raw.row_index [ B_k ] == j ) {
10881096 cumul += 1 ;
1097+ C_ccs_raw.col_start [ j + 1 ] += 1 ;
10891098 }
10901099 }
10911100 C_crs_raw.col_start [ i + 1 ] = cumul;
10921101 }
10931102
10941103#ifdef _DEBUG
10951104 // Print the CRS prefix sum
1096- std::cout << " CRS prefix sum: " ;
1105+ std::cout << " before nCRS prefix sum: " ;
10971106 for ( size_t i = 0 ; i <= m_A; i++ ) {
10981107 std::cout << C_crs_raw.col_start [ i ] << " " ;
10991108 }
11001109 std::cout << " \n " ;
1110+ // Print the CCS prefix sum
1111+ std::cout << " before nCCS prefix sum: " ;
1112+ for ( size_t i = 0 ; i <= n_A; i++ ) {
1113+ std::cout << C_ccs_raw.col_start [ i ] << " " ;
1114+ }
1115+ std::cout << " \n " ;
11011116#endif
11021117
11031118 // Apply the prefix sum
11041119 for ( size_t i = 1 ; i <= m_A; i++ ) {
11051120 C_crs_raw.col_start [ i ] += C_crs_raw.col_start [ i - 1 ];
1106- C_ccs_raw.col_start [ i ] = C_crs_raw.col_start [ i ];
11071121 }
1122+ for ( size_t i = 1 ; i <= n_A; i++ ) {
1123+ C_ccs_raw.col_start [ i ] += C_ccs_raw.col_start [ i - 1 ];
1124+ }
1125+
1126+ #ifdef _DEBUG
1127+ // Print the CRS prefix sum
1128+ std::cout << " after nCRS prefix sum: " ;
1129+ for ( size_t i = 0 ; i <= m_A; i++ ) {
1130+ std::cout << C_crs_raw.col_start [ i ] << " " ;
1131+ }
1132+ std::cout << " \n " ;
1133+ // Print the CCS prefix sum
1134+ std::cout << " after nCCS prefix sum: " ;
1135+ for ( size_t i = 0 ; i <= n_A; i++ ) {
1136+ std::cout << C_ccs_raw.col_start [ i ] << " " ;
1137+ }
1138+ std::cout << " \n " ;
1139+ #endif
11081140
11091141 // Check if the number of nonzeros is greater than the capacity
1110- if ( C_crs_raw.col_start [ m_A ] > nzc || C_ccs_raw.col_start [ m_A ] > nzc ) {
1142+ if ( C_crs_raw.col_start [ m_A ] > nzc || C_ccs_raw.col_start [ n_A ] > nzc ) {
11111143#ifdef _DEBUG
11121144 std::cout << " Insufficient capacity detected for requested operation.\n "
1113- << " Requested " << C_crs_raw .col_start [ m_A ] << " nonzeros"
1145+ << " Requested " << C_ccs_raw .col_start [ m_A ] << " nonzeros"
11141146 << " but capacity is " << nzc << " \n " ;
11151147#endif
11161148 return MISMATCH;
11171149 }
11181150
1119- #ifdef _H_GRB_REFERENCE_OMP_BLAS3
1120- #pragma omp parallel for simd
1121- #endif
1122- for ( size_t i = 0 ; i < m_A; i++ ) {
1123- C_crs_raw.row_index [ i ] = C_ccs_raw.row_index [ i ] = 0 ;
1124- }
1125-
11261151 RC local_rc = rc;
11271152#ifdef _H_GRB_REFERENCE_OMP_BLAS3
11281153#pragma omp parallel default( none ) \
1129- shared ( C_ccs_raw, C_crs_raw, A_raw, B_raw, rc, std::cout ) \
1130- firstprivate ( local_rc, m_A, oper, A_identity, B_identity )
1154+ shared ( C_ccs_raw, C_crs_raw, A_raw, B_raw, rc, std::cout ) \
1155+ firstprivate ( local_rc, m_A, oper, A_identity, B_identity )
11311156#endif
11321157 {
11331158 size_t start_row = 0 ;
@@ -1144,6 +1169,7 @@ namespace grb {
11441169 const auto A_k_end = A_raw.col_start [ i + 1 ];
11451170 for ( auto A_k = A_k_start; A_k < A_k_end; ++A_k ) {
11461171 const auto j = A_raw.row_index [ A_k ];
1172+
11471173 while ( B_k < B_raw.col_start [ i + 1 ]
11481174 && B_raw.row_index [ B_k ] > j
11491175 ) {
@@ -1165,16 +1191,17 @@ namespace grb {
11651191
11661192 C_crs_raw.row_index [ C_k ] = j;
11671193 C_crs_raw.setValue ( C_k, c_val );
1168- C_ccs_raw.row_index [ C_k ] = i;
1169- C_ccs_raw.setValue ( C_k, c_val );
1194+
1195+ C_ccs_raw.row_index [ C_ccs_raw.col_start [ j ] ] = i;
1196+ C_ccs_raw.setValue ( C_ccs_raw.col_start [ j ], c_val );
11701197#ifdef _DEBUG
11711198 std::cout << " A( " + std::to_string ( i ) + " ;"
11721199 + std::to_string ( j ) + " ) = "
11731200 + std::to_string ( a_val ) + " \n " ;
11741201 std::cout << " B( " + std::to_string ( i ) + " ;"
11751202 + std::to_string ( j ) + " ) = "
11761203 + std::to_string ( b_val ) + " \n " ;
1177- std::cout << " C.crs ( " + std::to_string ( i ) + " ;"
1204+ std::cout << " C( " + std::to_string ( i ) + " ;"
11781205 + std::to_string ( j ) + " ) = "
11791206 + std::to_string ( c_val ) + " \n " ;
11801207#endif
0 commit comments