Skip to content

Commit cbdef09

Browse files
committed
Update Scrapeless component to version 0.2.0
- use nodes integration server to simple request logic
1 parent f8de313 commit cbdef09

File tree

4 files changed

+17
-168
lines changed

4 files changed

+17
-168
lines changed

components/scrapeless/actions/scraping-api/scraping-api.mjs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,9 @@ export default {
3030
if (apiServer === "googleSearch") {
3131
const submitData = {
3232
actor: "scraper.google.search",
33-
input: {
34-
q: inputProps.q,
35-
hl: inputProps.hl,
36-
gl: inputProps.gl,
37-
},
33+
q: inputProps.q,
34+
hl: inputProps.hl,
35+
gl: inputProps.gl,
3836
};
3937
const response = await scrapeless.scrapingApi({
4038
$,

components/scrapeless/actions/universal-scraping-api/universal-scraping-api.mjs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,10 @@ export default {
3131
if (apiServer === "webUnlocker") {
3232
const submitData = {
3333
actor: "unlocker.webunlocker",
34-
input: {
35-
url: rest.url,
36-
jsRender: rest.jsRender,
37-
headless: rest.headless,
38-
},
39-
proxy: {
40-
country: rest.country,
41-
},
34+
country: rest.country,
35+
url: rest.url,
36+
jsRender: rest.jsRender,
37+
headless: rest.headless,
4238
};
4339
const response = await this.scrapeless.universalScrapingApi({
4440
$,

components/scrapeless/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@pipedream/scrapeless",
3-
"version": "0.1.1",
3+
"version": "0.2.0",
44
"description": "Pipedream Scrapeless Components",
55
"main": "scrapeless.app.mjs",
66
"keywords": [
Lines changed: 9 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
11
import { axios } from "@pipedream/platform";
2-
import {
3-
isObject, log, isNullOrUnDef,
4-
} from "./common/utils.mjs";
52

63
export default {
74
type: "app",
85
app: "scrapeless",
96
methods: {
107
_baseUrl() {
11-
return "https://api.scrapeless.com/api/v1";
8+
return "https://scrapeless-nodes.norains.com/api/v1";
129
},
1310
_headers() {
1411
return {
15-
"x-api-token": `${this.$auth.api_key}`,
12+
"x-api-key": `${this.$auth.api_key}`,
1613
};
1714
},
1815
_makeRequest({
@@ -27,81 +24,40 @@ export default {
2724
submitScrapeJob(opts = {}) {
2825
return this._makeRequest({
2926
method: "POST",
30-
path: "/scraper/request",
27+
path: "/nodes/scraper/request",
3128
...opts,
3229
});
3330
},
3431
getScrapeResult({ scrapeJobId }) {
3532
return this._makeRequest({
36-
path: `/scraper/result/${scrapeJobId}`,
33+
path: `/nodes/scraper/result/${scrapeJobId}`,
3734
});
3835
},
3936
async scrapingApi({ submitData }) {
40-
const path = "/scraper/request";
41-
const requestWithSync = {
42-
...submitData,
43-
async: true,
44-
};
37+
const path = "/nodes/deepserp";
4538
const res = await this._makeRequest({
4639
method: "POST",
4740
path,
48-
data: requestWithSync,
41+
data: submitData,
4942
});
5043

51-
if (res.data) {
52-
return res.data;
53-
}
54-
55-
if (res?.taskId) {
56-
log("Waiting for scrape result...");
57-
58-
while (true) {
59-
await new Promise((resolve) => setTimeout(resolve, 1000));
60-
const result = await this.getScrapeResult({
61-
scrapeJobId: res.taskId,
62-
});
63-
64-
if (isObject(result) && Object.keys(result).length > 0) {
65-
log("Scrape result received");
66-
return result;
67-
}
68-
69-
if (isNullOrUnDef(result)) {
70-
log("Scrape result is undefined");
71-
return result;
72-
}
73-
}
74-
}
7544
return res;
7645
},
7746
async universalScrapingApi({ submitData }) {
78-
const path = "/unlocker/request";
47+
const path = "/nodes/universal-scraping/unlocker";
7948
const res = await this._makeRequest({
8049
method: "POST",
8150
path,
8251
data: submitData,
8352
});
84-
85-
if (res.data) {
86-
return res.data;
87-
}
88-
8953
return res;
9054
},
9155
async crawlerCrawl({ submitData }) {
92-
const path = "/crawler/crawl";
93-
94-
const browserOptions = {
95-
"proxy_country": "ANY",
96-
"session_name": "Crawl",
97-
"session_recording": true,
98-
"session_ttl": 900,
99-
};
56+
const path = "/nodes/crawler/crawl";
10057

10158
const data = {
10259
url: submitData.url,
10360
limit: submitData.limit,
104-
browserOptions: browserOptions,
10561
};
10662

10763
const res = await this._makeRequest({
@@ -110,74 +66,13 @@ export default {
11066
data,
11167
});
11268

113-
// get job id
114-
if (res.id) {
115-
log("Crawl job started");
116-
return this.monitorJobStatus(res.id);
117-
}
118-
11969
return res;
12070
},
121-
/**
122-
* Monitor the status of a crawl job.
123-
* @param {string} jobId - The ID of the crawl job.
124-
* @param {number} [pollInterval=2] - The interval in seconds to poll for job status.
125-
* @returns {Promise<Object>} - The status response of the crawl job.
126-
*/
127-
async monitorJobStatus(jobId, pollInterval = 2) {
128-
try {
129-
while (true) {
130-
let statusResponse = await this._makeRequest({
131-
method: "GET",
132-
path: `/crawler/crawl/${jobId}`,
133-
});
134-
log("Crawl job status: ", statusResponse.status);
135-
if (statusResponse.status === "completed") {
136-
if ("data" in statusResponse) {
137-
let data = statusResponse.data;
138-
while (typeof statusResponse === "object" && "next" in statusResponse) {
139-
if (data.length === 0) break;
140-
statusResponse = await this._makeRequest({
141-
method: "GET",
142-
path: statusResponse.next,
143-
});
144-
data = data.concat(statusResponse.data);
145-
}
146-
statusResponse.data = data;
147-
return statusResponse;
148-
} else {
149-
throw new Error("Crawl job completed but no data was returned");
150-
}
151-
} else if ([
152-
"active",
153-
"paused",
154-
"pending",
155-
"queued",
156-
"waiting",
157-
"scraping",
158-
].includes(statusResponse.status)) {
159-
pollInterval = Math.max(pollInterval, 2);
160-
await new Promise((resolve) => setTimeout(resolve, pollInterval * 1000));
161-
} else {
162-
throw new Error(`Crawl job failed or was stopped. Status: ${statusResponse.status}`);
163-
}
164-
}
165-
} catch (error) {
166-
throw new Error(error.message);
167-
}
168-
},
16971
async crawlerScrape({ submitData }) {
170-
const path = "/crawler/scrape";
171-
const browserOptions = {
172-
"proxy_country": "ANY",
173-
"session_name": "Scrape",
174-
"session_recording": true,
175-
"session_ttl": 900,
176-
};
72+
const path = "/nodes/crawler/scrape";
17773

17874
const data = {
17975
url: submitData.url,
180-
browserOptions: browserOptions,
18176
};
18277

18378
try {
@@ -186,51 +81,11 @@ export default {
18681
path,
18782
data,
18883
});
189-
190-
if (!response.id) {
191-
throw new Error("Failed to start a scrape job");
192-
}
193-
194-
log("Scrape job started");
195-
196-
let pollInterval = 2;
197-
198-
while (true) {
199-
const statusResponse = await this.checkScrapeStatus(response.id);
200-
log("Scrape job status: ", statusResponse.status);
201-
if (statusResponse.status !== "scraping") {
202-
return statusResponse;
203-
}
204-
205-
pollInterval = Math.max(pollInterval, 2);
206-
await new Promise((resolve) => setTimeout(resolve, pollInterval * 1000));
207-
}
208-
} catch (error) {
209-
throw new Error(error.message);
210-
}
211-
},
212-
213-
/**
214-
* Check the status of a crawl job.
215-
* @param {string} id - The ID of the crawl job.
216-
* @returns {Promise<Object>} - The status response of the crawl job.
217-
*/
218-
async checkScrapeStatus(id) {
219-
if (!id) {
220-
throw new Error("No scrape ID provided");
221-
}
222-
const url = `/crawler/scrape/${id}`;
223-
try {
224-
const response = await this._makeRequest({
225-
method: "GET",
226-
path: url,
227-
});
22884
return response;
22985
} catch (error) {
23086
throw new Error(error.message);
23187
}
23288
},
233-
23489
},
23590

23691
};

0 commit comments

Comments
 (0)