@@ -2,7 +2,9 @@ package wayback
22
33import (
44 "context"
5+ "errors"
56 "fmt"
7+
68 jsoniter "github.com/json-iterator/go"
79 "github.com/lc/gau/v2/pkg/httpclient"
810 "github.com/lc/gau/v2/pkg/providers"
@@ -36,12 +38,7 @@ type waybackResult [][]string
3638// Fetch fetches all urls for a given domain and sends them to a channel.
3739// It returns an error should one occur.
3840func (c * Client ) Fetch (ctx context.Context , domain string , results chan string ) error {
39- pages , err := c .getPagination (domain )
40- if err != nil {
41- return fmt .Errorf ("failed to fetch wayback pagination: %s" , err )
42- }
43-
44- for page := uint (0 ); page < pages ; page ++ {
41+ for page := uint (0 ); ; page ++ {
4542 select {
4643 case <- ctx .Done ():
4744 return nil
@@ -51,9 +48,11 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
5148 // make HTTP request
5249 resp , err := httpclient .MakeRequest (c .config .Client , apiURL , c .config .MaxRetries , c .config .Timeout )
5350 if err != nil {
51+ if errors .Is (err , httpclient .ErrBadRequest ) {
52+ return nil
53+ }
5454 return fmt .Errorf ("failed to fetch wayback results page %d: %s" , page , err )
5555 }
56-
5756 var result waybackResult
5857 if err = jsoniter .Unmarshal (resp , & result ); err != nil {
5958 return fmt .Errorf ("failed to decode wayback results for page %d: %s" , page , err )
@@ -72,7 +71,6 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
7271 }
7372 }
7473 }
75- return nil
7674}
7775
7876// formatUrl returns a formatted URL for the Wayback API
@@ -82,25 +80,7 @@ func (c *Client) formatURL(domain string, page uint) string {
8280 }
8381 filterParams := c .filters .GetParameters (true )
8482 return fmt .Sprintf (
85- "https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&page=%d" ,
83+ "https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&pageSize=100& page=%d" ,
8684 domain , page ,
8785 ) + filterParams
8886}
89-
90- // getPagination returns the number of pages for Wayback
91- func (c * Client ) getPagination (domain string ) (uint , error ) {
92- url := fmt .Sprintf ("%s&showNumPages=true" , c .formatURL (domain , 0 ))
93- resp , err := httpclient .MakeRequest (c .config .Client , url , c .config .MaxRetries , c .config .Timeout )
94-
95- if err != nil {
96- return 0 , err
97- }
98-
99- var paginationResult uint
100-
101- if err = jsoniter .Unmarshal (resp , & paginationResult ); err != nil {
102- return 0 , err
103- }
104-
105- return paginationResult , nil
106- }
0 commit comments