@@ -411,11 +411,11 @@ export class RedisDedupeIndex {
411411 }
412412
413413 // COUNT STATS
414- incrDeduped ( pipe : ChainableCommander , key : string , value : number ) {
414+ incrDeduped ( pipe : ChainableCommander , key : string , value : number , count = 1 ) {
415415 if ( value > 0 ) {
416416 pipe . hincrby ( key , "conservedSize" , value ) ;
417417 }
418- pipe . hincrby ( key , "dupeUrls" , 1 ) ;
418+ pipe . hincrby ( key , "dupeUrls" , count ) ;
419419 }
420420
421421 incrTotalUrls ( pipe : ChainableCommander , key : string ) {
@@ -507,7 +507,7 @@ export class RedisDedupeIndex {
507507 }
508508
509509 async matchRevisitSize ( hash : string , origSize : number ) {
510- const incrMap : Record < string , number > = { } ;
510+ const incrMap : Record < string , { size : number ; count : number } > = { } ;
511511
512512 const length = 25 ;
513513 let start = 0 ;
@@ -521,7 +521,13 @@ export class RedisDedupeIndex {
521521
522522 for ( const entry of sizeEntries ) {
523523 const { size, crawlId } = JSON . parse ( entry ) ;
524- incrMap [ crawlId ] = ( incrMap [ crawlId ] || 0 ) + ( origSize - size ) ;
524+ let res = incrMap [ crawlId ] ;
525+ if ( ! res ) {
526+ res = { size : 0 , count : 0 } ;
527+ incrMap [ crawlId ] = res ;
528+ }
529+ res . size += origSize - size ;
530+ res . count += 1 ;
525531 }
526532
527533 if ( sizeEntries . length < length ) {
@@ -532,9 +538,9 @@ export class RedisDedupeIndex {
532538
533539 const pipe = this . dedupeRedis . pipeline ( ) ;
534540
535- for ( const [ crawlId , value ] of Object . entries ( incrMap ) ) {
536- this . incrDeduped ( pipe , `h:${ crawlId } :counts` , value ) ;
537- this . incrDeduped ( pipe , DUPE_ALL_COUNTS , value ) ;
541+ for ( const [ crawlId , { size , count } ] of Object . entries ( incrMap ) ) {
542+ this . incrDeduped ( pipe , `h:${ crawlId } :counts` , size , count ) ;
543+ this . incrDeduped ( pipe , DUPE_ALL_COUNTS , size , count ) ;
538544 }
539545
540546 pipe . del ( `rev:${ hash } ` ) ;
0 commit comments