66import org .archive .modules .fetcher .DefaultServerCache ;
77import org .archive .modules .fetcher .FetchHTTP2 ;
88import org .archive .net .UURIFactory ;
9+ import org .archive .url .URIException ;
910import org .archive .util .Recorder ;
10- import org .junit .jupiter .api .AfterAll ;
11- import org .junit .jupiter .api .BeforeAll ;
12- import org .junit .jupiter .api .Test ;
11+ import org .junit .jupiter .api .*;
1312import org .junit .jupiter .api .condition .EnabledIfSystemProperty ;
1413import org .junit .jupiter .api .io .TempDir ;
1514
1817import java .net .InetSocketAddress ;
1918import java .nio .file .Path ;
2019import java .util .ArrayList ;
20+ import java .util .HashSet ;
2121import java .util .List ;
22+ import java .util .Set ;
2223
2324import static java .lang .System .Logger .Level .DEBUG ;
2425import static org .junit .jupiter .api .Assertions .*;
2526
27+ @ EnabledIfSystemProperty (named = "runBrowserTests" , matches = "true" )
2628class BrowserTest {
2729 private static final System .Logger logger = System .getLogger (BrowserTest .class .getName ());
2830
2931 private static HttpServer httpServer ;
30-
32+ private static FetchHTTP2 fetcher ;
33+ private static Browser browser ;
34+ private static String baseUrl ;
35+ private static ArrayList <CrawlURI > subrequests ;
36+ private static CrawlController crawlController ;
37+ private Set <Recorder > recorders = new HashSet <>();
3138 @ TempDir
3239 Path tempDir ;
3340
3441 @ Test
35- @ EnabledIfSystemProperty ( named = "runBrowserTests" , matches = "true" )
42+ @ Disabled
3643 public void test () throws IOException , InterruptedException {
37- String url = "http://" + httpServer .getAddress ().getAddress ().getHostAddress () + ":" +
38- httpServer .getAddress ().getPort () + "/" ;
39- var fetcher = new FetchHTTP2 (new DefaultServerCache (), null );
40- fetcher .setUserAgentProvider (new CrawlMetadata ());
41- fetcher .start ();
42- try {
43- var crawlController = new CrawlController ();
44- FetchChain fetchChain = new FetchChain ();
45- fetchChain .setProcessors (List .of ());
46- crawlController .setFetchChain (fetchChain );
47-
48- var subrequests = new ArrayList <CrawlURI >();
49-
50- DispositionChain dispositionChain = new DispositionChain ();
51- dispositionChain .setProcessors (List .of (new Processor () {
52- @ Override
53- protected boolean shouldProcess (CrawlURI uri ) {
54- return true ;
55- }
44+ CrawlURI crawlURI = newCrawlURI (baseUrl );
45+ fetcher .process (crawlURI );
46+ assertEquals (200 , crawlURI .getFetchStatus ());
47+ browser .innerProcess (crawlURI );
5648
57- @ Override
58- protected void innerProcess (CrawlURI uri ) throws InterruptedException {
59- subrequests .add (uri );
60- }
61- }));
62- crawlController .setDispositionChain (dispositionChain );
63- crawlController .getScratchDir ().setPath (tempDir .toString ());
64- var browserProcessor = new Browser (fetcher , crawlController , event -> {}, null );
65- try {
66- browserProcessor .start ();
67-
68- CrawlURI crawlURI = new CrawlURI (UURIFactory .getInstance (url ));
69- crawlURI .setRecorder (new Recorder (tempDir .toFile (), "fetcher" ));
70- fetcher .process (crawlURI );
71- assertEquals (200 , crawlURI .getFetchStatus ());
72- browserProcessor .innerProcess (crawlURI );
73-
74- var outLinks = new ArrayList <>(crawlURI .getOutLinks ());
75- assertEquals ("/link" , outLinks .get (0 ).getUURI ().getPath ());
76- assertTrue (crawlURI .getAnnotations ().contains ("browser" ));
77-
78- logger .log (DEBUG , "Subrequests: {0}" , subrequests );
79- } finally {
80- browserProcessor .stop ();
81- }
82- } finally {
83- fetcher .stop ();
49+ var outLinks = new ArrayList <>(crawlURI .getOutLinks ());
50+ assertEquals ("/link" , outLinks .get (0 ).getUURI ().getPath ());
51+ assertTrue (crawlURI .getAnnotations ().contains ("browser" ));
52+
53+ logger .log (DEBUG , "Subrequests: {0}" , subrequests );
54+ }
55+
56+ @ Test
57+ public void testDownload () throws IOException , InterruptedException {
58+ CrawlURI crawlURI = newCrawlURI (baseUrl + "download.bin" );
59+ fetcher .process (crawlURI );
60+ assertEquals (200 , crawlURI .getFetchStatus ());
61+ browser .innerProcess (crawlURI );
62+ }
63+
64+ private CrawlURI newCrawlURI (String uri ) throws URIException {
65+ CrawlURI curi = new CrawlURI (UURIFactory .getInstance (uri ));
66+ Recorder recorder = new Recorder (tempDir .toFile (), "fetcher" );
67+ recorders .add (recorder );
68+ curi .setRecorder (recorder );
69+ return curi ;
70+ }
71+
72+ @ BeforeEach
73+ void setUp () {
74+ crawlController .getScratchDir ().setPath (tempDir .toString ());
75+ }
76+
77+ @ AfterEach
78+ void tearDown () {
79+ subrequests .clear ();
80+ for (Recorder recorder : recorders ) {
81+ recorder .cleanup ();
8482 }
8583 }
8684
8785 @ BeforeAll
86+ static void setUpAll () throws Exception {
87+ startHttpServer ();
88+ startProcessors ();
89+ }
90+
8891 static void startHttpServer () throws IOException {
8992 httpServer = HttpServer .create (new InetSocketAddress (InetAddress .getLoopbackAddress (), 0 ), -1 );
9093 httpServer .createContext ("/" , exchange -> {
@@ -101,6 +104,10 @@ static void startHttpServer() throws IOException {
101104 body = "body { color: red; background: url(bg.jpg); }" ;
102105 contentType = "text/css" ;
103106 }
107+ case "/download.bin" -> {
108+ body = "sample-download-file" ;
109+ contentType = "application/octet-stream" ;
110+ }
104111 default -> status = 404 ;
105112 }
106113 exchange .getResponseHeaders ().add ("Content-Type" , contentType );
@@ -109,11 +116,43 @@ static void startHttpServer() throws IOException {
109116 exchange .close ();
110117 });
111118 httpServer .start ();
119+ baseUrl = "http://" + httpServer .getAddress ().getAddress ().getHostAddress () + ":" +
120+ httpServer .getAddress ().getPort () + "/" ;
121+ }
122+
123+ static void startProcessors () {
124+ fetcher = new FetchHTTP2 (new DefaultServerCache (), null );
125+ fetcher .setUserAgentProvider (new CrawlMetadata ());
126+ fetcher .start ();
127+ crawlController = new CrawlController ();
128+ FetchChain fetchChain = new FetchChain ();
129+ fetchChain .setProcessors (List .of ());
130+ crawlController .setFetchChain (fetchChain );
131+
132+ subrequests = new ArrayList <CrawlURI >();
133+
134+ DispositionChain dispositionChain = new DispositionChain ();
135+ dispositionChain .setProcessors (List .of (new Processor () {
136+ @ Override
137+ protected boolean shouldProcess (CrawlURI uri ) {
138+ return true ;
139+ }
140+
141+ @ Override
142+ protected void innerProcess (CrawlURI uri ) throws InterruptedException {
143+ subrequests .add (uri );
144+ }
145+ }));
146+ crawlController .setDispositionChain (dispositionChain );
147+ browser = new Browser (fetcher , crawlController , event -> {}, null );
148+ browser .start ();
112149 }
113150
114151 @ AfterAll
115- static void stopHttpServer () {
152+ static void tearDownAll () {
116153 if (httpServer != null ) httpServer .stop (0 );
154+ if (browser != null ) browser .stop ();
155+ if (fetcher != null ) fetcher .stop ();
117156 }
118157
119158}
0 commit comments