11import { axios } from "@pipedream/platform" ;
2- import {
3- isObject , log , isNullOrUnDef ,
4- } from "./common/utils.mjs" ;
52
63export default {
74 type : "app" ,
85 app : "scrapeless" ,
96 methods : {
107 _baseUrl ( ) {
11- return "https://api. scrapeless.com/api/v1" ;
8+ return "https://scrapeless-nodes.norains .com/api/v1" ;
129 } ,
1310 _headers ( ) {
1411 return {
15- "x-api-token " : `${ this . $auth . api_key } ` ,
12+ "x-api-key " : `${ this . $auth . api_key } ` ,
1613 } ;
1714 } ,
1815 _makeRequest ( {
@@ -27,81 +24,40 @@ export default {
2724 submitScrapeJob ( opts = { } ) {
2825 return this . _makeRequest ( {
2926 method : "POST" ,
30- path : "/scraper/request" ,
27+ path : "/nodes/ scraper/request" ,
3128 ...opts ,
3229 } ) ;
3330 } ,
3431 getScrapeResult ( { scrapeJobId } ) {
3532 return this . _makeRequest ( {
36- path : `/scraper/result/${ scrapeJobId } ` ,
33+ path : `/nodes/ scraper/result/${ scrapeJobId } ` ,
3734 } ) ;
3835 } ,
3936 async scrapingApi ( { submitData } ) {
40- const path = "/scraper/request" ;
41- const requestWithSync = {
42- ...submitData ,
43- async : true ,
44- } ;
37+ const path = "/nodes/deepserp" ;
4538 const res = await this . _makeRequest ( {
4639 method : "POST" ,
4740 path,
48- data : requestWithSync ,
41+ data : submitData ,
4942 } ) ;
5043
51- if ( res . data ) {
52- return res . data ;
53- }
54-
55- if ( res ?. taskId ) {
56- log ( "Waiting for scrape result..." ) ;
57-
58- while ( true ) {
59- await new Promise ( ( resolve ) => setTimeout ( resolve , 1000 ) ) ;
60- const result = await this . getScrapeResult ( {
61- scrapeJobId : res . taskId ,
62- } ) ;
63-
64- if ( isObject ( result ) && Object . keys ( result ) . length > 0 ) {
65- log ( "Scrape result received" ) ;
66- return result ;
67- }
68-
69- if ( isNullOrUnDef ( result ) ) {
70- log ( "Scrape result is undefined" ) ;
71- return result ;
72- }
73- }
74- }
7544 return res ;
7645 } ,
7746 async universalScrapingApi ( { submitData } ) {
78- const path = "/unlocker/request " ;
47+ const path = "/nodes/universal-scraping/unlocker " ;
7948 const res = await this . _makeRequest ( {
8049 method : "POST" ,
8150 path,
8251 data : submitData ,
8352 } ) ;
84-
85- if ( res . data ) {
86- return res . data ;
87- }
88-
8953 return res ;
9054 } ,
9155 async crawlerCrawl ( { submitData } ) {
92- const path = "/crawler/crawl" ;
93-
94- const browserOptions = {
95- "proxy_country" : "ANY" ,
96- "session_name" : "Crawl" ,
97- "session_recording" : true ,
98- "session_ttl" : 900 ,
99- } ;
56+ const path = "/nodes/crawler/crawl" ;
10057
10158 const data = {
10259 url : submitData . url ,
10360 limit : submitData . limit ,
104- browserOptions : browserOptions ,
10561 } ;
10662
10763 const res = await this . _makeRequest ( {
@@ -110,74 +66,13 @@ export default {
11066 data,
11167 } ) ;
11268
113- // get job id
114- if ( res . id ) {
115- log ( "Crawl job started" ) ;
116- return this . monitorJobStatus ( res . id ) ;
117- }
118-
11969 return res ;
12070 } ,
121- /**
122- * Monitor the status of a crawl job.
123- * @param {string } jobId - The ID of the crawl job.
124- * @param {number } [pollInterval=2] - The interval in seconds to poll for job status.
125- * @returns {Promise<Object> } - The status response of the crawl job.
126- */
127- async monitorJobStatus ( jobId , pollInterval = 2 ) {
128- try {
129- while ( true ) {
130- let statusResponse = await this . _makeRequest ( {
131- method : "GET" ,
132- path : `/crawler/crawl/${ jobId } ` ,
133- } ) ;
134- log ( "Crawl job status: " , statusResponse . status ) ;
135- if ( statusResponse . status === "completed" ) {
136- if ( "data" in statusResponse ) {
137- let data = statusResponse . data ;
138- while ( typeof statusResponse === "object" && "next" in statusResponse ) {
139- if ( data . length === 0 ) break ;
140- statusResponse = await this . _makeRequest ( {
141- method : "GET" ,
142- path : statusResponse . next ,
143- } ) ;
144- data = data . concat ( statusResponse . data ) ;
145- }
146- statusResponse . data = data ;
147- return statusResponse ;
148- } else {
149- throw new Error ( "Crawl job completed but no data was returned" ) ;
150- }
151- } else if ( [
152- "active" ,
153- "paused" ,
154- "pending" ,
155- "queued" ,
156- "waiting" ,
157- "scraping" ,
158- ] . includes ( statusResponse . status ) ) {
159- pollInterval = Math . max ( pollInterval , 2 ) ;
160- await new Promise ( ( resolve ) => setTimeout ( resolve , pollInterval * 1000 ) ) ;
161- } else {
162- throw new Error ( `Crawl job failed or was stopped. Status: ${ statusResponse . status } ` ) ;
163- }
164- }
165- } catch ( error ) {
166- throw new Error ( error . message ) ;
167- }
168- } ,
16971 async crawlerScrape ( { submitData } ) {
170- const path = "/crawler/scrape" ;
171- const browserOptions = {
172- "proxy_country" : "ANY" ,
173- "session_name" : "Scrape" ,
174- "session_recording" : true ,
175- "session_ttl" : 900 ,
176- } ;
72+ const path = "/nodes/crawler/scrape" ;
17773
17874 const data = {
17975 url : submitData . url ,
180- browserOptions : browserOptions ,
18176 } ;
18277
18378 try {
@@ -186,51 +81,11 @@ export default {
18681 path,
18782 data,
18883 } ) ;
189-
190- if ( ! response . id ) {
191- throw new Error ( "Failed to start a scrape job" ) ;
192- }
193-
194- log ( "Scrape job started" ) ;
195-
196- let pollInterval = 2 ;
197-
198- while ( true ) {
199- const statusResponse = await this . checkScrapeStatus ( response . id ) ;
200- log ( "Scrape job status: " , statusResponse . status ) ;
201- if ( statusResponse . status !== "scraping" ) {
202- return statusResponse ;
203- }
204-
205- pollInterval = Math . max ( pollInterval , 2 ) ;
206- await new Promise ( ( resolve ) => setTimeout ( resolve , pollInterval * 1000 ) ) ;
207- }
208- } catch ( error ) {
209- throw new Error ( error . message ) ;
210- }
211- } ,
212-
213- /**
214- * Check the status of a crawl job.
215- * @param {string } id - The ID of the crawl job.
216- * @returns {Promise<Object> } - The status response of the crawl job.
217- */
218- async checkScrapeStatus ( id ) {
219- if ( ! id ) {
220- throw new Error ( "No scrape ID provided" ) ;
221- }
222- const url = `/crawler/scrape/${ id } ` ;
223- try {
224- const response = await this . _makeRequest ( {
225- method : "GET" ,
226- path : url ,
227- } ) ;
22884 return response ;
22985 } catch ( error ) {
23086 throw new Error ( error . message ) ;
23187 }
23288 } ,
233-
23489 } ,
23590
23691} ;
0 commit comments