4343import org .archive .util .IdleBarrier ;
4444import org .archive .util .Recorder ;
4545import org .eclipse .jetty .client .Result ;
46+ import org .json .JSONException ;
47+ import org .json .JSONObject ;
4648import org .springframework .context .ApplicationEventPublisher ;
4749
4850import java .io .IOException ;
5759import java .util .*;
5860import java .util .concurrent .ConcurrentHashMap ;
5961import java .util .concurrent .Semaphore ;
62+ import java .util .concurrent .atomic .AtomicLong ;
6063import java .util .stream .Stream ;
6164
6265import static java .lang .System .Logger .Level .ERROR ;
@@ -80,6 +83,7 @@ public class Browser extends Processor {
8083 protected final Map <String , BrowserPage > pages = new ConcurrentHashMap <>();
8184 protected final Map <BrowsingContext .Context , String > pageIdsByContext = new ConcurrentHashMap <>();
8285 protected final ProcessorChain extractorChain = new ProcessorChain ();
86+ protected final AtomicLong subresourcesRecorded = new AtomicLong ();
8387 protected List <Behavior > behaviors ;
8488 protected String executable ;
8589 protected List <String > options = List .of ("--headless" );
@@ -137,6 +141,29 @@ public void innerProcess(CrawlURI curi) {
137141 }
138142 }
139143
144+ @ Override
145+ protected JSONObject toCheckpointJson () throws JSONException {
146+ return super .toCheckpointJson ().put ("subresourcesRecorded" , subresourcesRecorded .get ());
147+ }
148+
149+ @ Override
150+ protected void fromCheckpointJson (JSONObject json ) throws JSONException {
151+ super .fromCheckpointJson (json );
152+ subresourcesRecorded .set (json .getLong ("subresourcesRecorded" ));
153+ }
154+
155+ @ Override
156+ public String report () {
157+ StringBuilder builder = new StringBuilder ();
158+ builder .append (super .report ());
159+ builder .append (" Pages visited: " ).append (getURICount ()).append ("\n " );
160+ builder .append (" Subresources recorded: " ).append (subresourcesRecorded .get ()).append ("\n " );
161+ for (var behavior : behaviors ) {
162+ builder .append (behavior .report ());
163+ }
164+ return builder .toString ();
165+ }
166+
140167 private void visit (CrawlURI curi ) {
141168 String pageId = UUID .randomUUID ().toString ();
142169 var tab = webdriver .browsingContext ().create (BrowsingContext .CreateType .tab ).context ();
@@ -398,6 +425,7 @@ public void onComplete(Result result) {
398425 if (recordingFailed ) {
399426 curi .setFetchStatus (FetchStatusCodes .S_RUNTIME_EXCEPTION );
400427 } else {
428+ subresourcesRecorded .incrementAndGet ();
401429 curi .getOverlayNames (); // for sideeffect of creating the overlayNames list
402430
403431 Frontier frontier = crawlController .getFrontier ();
0 commit comments