@@ -644,6 +644,7 @@ export class Crawler {
644644 if ( ! finished ) {
645645 if ( canceled ) {
646646 status = "canceled" ;
647+ await this . cleanupOnCancel ( ) ;
647648 } else if ( stopped ) {
648649 status = "done" ;
649650 logger . info ( "Crawl gracefully stopped on request" ) ;
@@ -1612,7 +1613,17 @@ self.__bx_behaviors.selectMainBehavior();
16121613
16131614 async checkCanceled ( ) {
16141615 if ( this . crawlState && ( await this . crawlState . isCrawlCanceled ( ) ) ) {
1616+ await this . cleanupOnCancel ( ) ;
16151617 await this . setStatusAndExit ( ExitCodes . Success , "canceled" ) ;
1618+ return true ;
1619+ }
1620+
1621+ return false ;
1622+ }
1623+
1624+ async cleanupOnCancel ( ) {
1625+ if ( this . deduping ) {
1626+ await this . crawlState . clearUncommitted ( ) ;
16161627 }
16171628 }
16181629
@@ -1659,8 +1670,7 @@ self.__bx_behaviors.selectMainBehavior();
16591670 return false ;
16601671 }
16611672
1662- if ( await this . crawlState . isCrawlCanceled ( ) ) {
1663- await this . setStatusAndExit ( ExitCodes . Success , "canceled" ) ;
1673+ if ( await this . checkCanceled ( ) ) {
16641674 return false ;
16651675 }
16661676
@@ -1728,6 +1738,9 @@ self.__bx_behaviors.selectMainBehavior();
17281738 await this . crawlState . addSourceWACZForDedupe ( filename ) ;
17291739 }
17301740 }
1741+ if ( this . deduping ) {
1742+ await this . crawlState . addUncommited ( ) ;
1743+ }
17311744
17321745 if ( POST_CRAWL_STATES . includes ( initState ) ) {
17331746 logger . info ( "crawl already finished, running post-crawl tasks" , {
@@ -1905,8 +1918,7 @@ self.__bx_behaviors.selectMainBehavior();
19051918 }
19061919
19071920 const generateFiles =
1908- ! this . params . dryRun &&
1909- ( ! this . interruptReason || this . finalExit || this . uploadAndDeleteLocal ) ;
1921+ ! this . params . dryRun && ( this . finalExit || this . uploadAndDeleteLocal ) ;
19101922
19111923 if (
19121924 ( this . params . generateCDX || this . params . generateWACZ ) &&
@@ -1949,13 +1961,18 @@ self.__bx_behaviors.selectMainBehavior();
19491961 }
19501962 }
19511963
1952- if ( this . finalExit && generateFiles && this . deduping ) {
1964+ // from here, actions that should happen on final crawler exit (not temp interrupt)
1965+ if ( ! this . finalExit ) {
1966+ return ;
1967+ }
1968+
1969+ if ( this . deduping ) {
19531970 // commit crawl data to main index
19541971 logger . info ( "Committing dedupe index" ) ;
19551972 await this . crawlState . commitDedupeDone ( ) ;
19561973 }
19571974
1958- if ( this . finalExit && generateFiles && this . params . saveProfile ) {
1975+ if ( this . params . saveProfile && generateFiles ) {
19591976 const resource = await this . browser . saveProfile (
19601977 this . params . saveProfile ,
19611978 this . storage ,
@@ -1966,7 +1983,7 @@ self.__bx_behaviors.selectMainBehavior();
19661983 }
19671984 }
19681985
1969- if ( this . params . waitOnDone && ( ! this . interruptReason || this . finalExit ) ) {
1986+ if ( this . params . waitOnDone ) {
19701987 this . done = true ;
19711988 logger . info ( "All done, waiting for signal..." ) ;
19721989 await this . crawlState . setStatus ( "done" ) ;
0 commit comments