Skip to content

Commit c7a64d8

Browse files
committed
Revert one of the previous fixes -- we should always iterate over the nonzeroes of x, since that is the vector with guaranteed the fewest number of nonzeroes. Instead, the error was in the computation of the mask -- it used y instead of x. Special note that the other fix WAS correct-- so there were two bugs in total. Also a minor code style fix
1 parent 8463a86 commit c7a64d8

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

include/graphblas/nonblocking/blas1.hpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10398,6 +10398,9 @@ namespace grb {
1039810398
// internal namespace for implementation of grb::dot
1039910399
namespace internal {
1040010400

10401+
/**
10402+
* Invariant: x should have fewer nonzeroes than y
10403+
*/
1040110404
template<
1040210405
Descriptor descr,
1040310406
#ifdef GRB_BOOLEAN_DISPATCHER
@@ -10432,6 +10435,7 @@ namespace grb {
1043210435
#else
1043310436
(void) upper_bound;
1043410437
#endif
10438+
assert( local_x.nonzeroes() <= local_y.nonzeroes() );
1043510439

1043610440
// get raw alias
1043710441
const InputType1 * __restrict__ a = internal::getRaw( x );
@@ -10450,8 +10454,7 @@ namespace grb {
1045010454

1045110455
// prepare registers
1045210456
for( size_t k = 0; k < AnyOp::blocksize; ++k, ++i ) {
10453-
mask[ k ] = already_dense_input_x ||
10454-
local_x.assigned( already_dense_input_y ? i : local_y.index( i ) );
10457+
mask[ k ] = already_dense_input_x || local_y.assigned(local_x.index( i ));
1045510458
}
1045610459

1045710460
// rewind
@@ -10463,7 +10466,7 @@ namespace grb {
1046310466
xx[ k ] = static_cast< typename AnyOp::D1 >(
1046410467
a[ ( already_dense_input_x ? i : local_x.index( i ) ) + lower_bound ] );
1046510468
yy[ k ] = static_cast< typename AnyOp::D2 >(
10466-
b[ ( already_dense_input_y ? i : local_y.index( i ) ) + lower_bound ] );
10469+
b[ ( already_dense_input_x ? i : local_x.index( i ) ) + lower_bound ] );
1046710470
}
1046810471
}
1046910472

@@ -10658,7 +10661,9 @@ namespace grb {
1065810661
already_dense_input_y, already_dense_input_x,
1065910662
array_reduced[ thread_id ],
1066010663
lower_bound, upper_bound,
10661-
local_y, local_x, x, y, local_y_nz,
10664+
local_y, local_x,
10665+
x, y,
10666+
local_y_nz,
1066210667
addMonoid, anyOp
1066310668
);
1066410669
}

0 commit comments

Comments
 (0)