@@ -885,7 +885,7 @@ struct Factorizer {
885885 auto mColIt = smoothNumberValues.begin ();
886886 auto nColIt = smoothNumberKeys.begin ();
887887 const size_t rows = smoothNumberValues.size ();
888- GaussianEliminationResult result (rows );
888+ GaussianEliminationResult result (primes. size () );
889889
890890 for (size_t col = 0U ; col < primes.size (); ++col) {
891891 auto mRowIt = mColIt ;
@@ -896,63 +896,63 @@ struct Factorizer {
896896 for (size_t row = colPlus1; row < rows; ++row) {
897897 ++mRowIt ;
898898 ++nRowIt;
899- if ((*mRowIt )[col]) {
899+ const boost::dynamic_bitset<size_t > rowCopy = *mRowIt ;
900+ if (rowCopy[col]) {
900901 // Swapping matrix rows corresponds
901902 // with swapping factorized numbers.
902903 std::swap (*mColIt , *mRowIt );
903904 std::swap (*nColIt, *nRowIt);
905+
904906 // Mark this column as having a pivot.
905907 result.marks [col] = true ;
906- break ;
907- }
908- }
909908
910- if (result.marks [col]) {
911- // Pivot found, now eliminate entries in this column
912- const boost::dynamic_bitset<size_t > &cm = *mColIt ;
913- const BigInteger &cn = *nColIt;
914- auto emRowIt = mColIt ;
915- auto enRowIt = nColIt;
916- const size_t maxLcv = std::min (colPlus1 + CpuCount, rows);
917- for (unsigned cpu = colPlus1; cpu < maxLcv; ++cpu) {
918- ++emRowIt;
919- ++enRowIt;
920- dispatch.dispatch ([cpu, &cpuCount, &col, &rows, &cm, &cn, emRowIt, enRowIt]() -> bool {
921- // Notice that each thread updates rows with space increments of cpuCount,
922- // based on the same unchanged outer-loop row, and this covers the inner-loop set.
923- auto mrIt = emRowIt;
924- auto nrIt = enRowIt;
925- for (size_t row = cpu; ; row += cpuCount) {
926- boost::dynamic_bitset<size_t > &rm = *mrIt;
927- BigInteger &rn = *nrIt;
928- if (rm[col]) {
929- // XOR-ing factorization rows
930- // is like multiplying the numbers.
931- rm ^= cm;
932- rn *= cn;
909+ // Pivot found, now eliminate entries in this column
910+ const boost::dynamic_bitset<size_t > &cm = *mColIt ;
911+ const BigInteger &cn = *nColIt;
912+ auto emRowIt = smoothNumberValues.begin ();
913+ auto enRowIt = smoothNumberKeys.begin ();
914+ const size_t maxLcv = std::min ((size_t )CpuCount, rows);
915+ for (size_t cpu = 0U ; cpu < maxLcv; ++cpu) {
916+ dispatch.dispatch ([cpu, &cpuCount, &col, &rows, &cm, &cn, emRowIt, enRowIt]() -> bool {
917+ // Notice that each thread updates rows with space increments of cpuCount,
918+ // based on the same unchanged outer-loop row, and this covers the inner-loop set.
919+ auto mrIt = emRowIt;
920+ auto nrIt = enRowIt;
921+ for (size_t row = cpu; ; row += cpuCount) {
922+ boost::dynamic_bitset<size_t > &rm = *mrIt;
923+ BigInteger &rn = *nrIt;
924+ if ((row != col) && rm[col]) {
925+ // XOR-ing factorization rows
926+ // is like multiplying the numbers.
927+ rm ^= cm;
928+ rn *= cn;
929+ }
930+ if ((row + cpuCount) >= rows) {
931+ // This is the completion condition.
932+ return false ;
933+ }
934+ // Every row advance is staggered according to cpuCount.
935+ std::advance (mrIt, cpuCount);
936+ std::advance (nrIt, cpuCount);
933937 }
934- if ((row + cpuCount) >= rows) {
935- // This is the completion condition.
936- return false ;
937- }
938- // Every row advance is staggered according to cpuCount.
939- std::advance (mrIt, cpuCount);
940- std::advance (nrIt, cpuCount);
941- }
942- return false ;
943- });
944- // Next inner-loop row (all at once by dispatch).
938+ return false ;
939+ });
940+ // Next inner-loop row (all at once by dispatch).
941+ ++emRowIt;
942+ ++enRowIt;
943+ }
944+ // All dispatched work must complete.
945+ dispatch.finish ();
945946 }
946- // All dispatched work must complete.
947- dispatch.finish ();
948947 }
948+
949949 // Next outer-loop row
950950 ++mColIt ;
951951 ++nColIt;
952952 }
953953
954954 // Step 2: Identify free rows
955- for (size_t i = 0U ; i < rows ; i++) {
955+ for (size_t i = 0U ; i < primes. size () ; i++) {
956956 if (result.marks [i]) {
957957 continue ;
958958 }
0 commit comments