Skip to content

Commit d36bf8e

Browse files
committed
feat(scrapeless): update actions
- Fix request URLs for `submit-scrape-job` and `get-scrape-result` actions - Refactor `submit-scrape-job` input props to align with Scrapeless's official API parameters - Fix issue with retrieving additional props asynchronously
1 parent fa1e7c7 commit d36bf8e

File tree

7 files changed

+32
-178
lines changed

7 files changed

+32
-178
lines changed

components/scrapeless/actions/crawler/crawler.mjs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ export default {
44
key: "scrapeless-crawler",
55
name: "Crawler",
66
description: "Crawl any website at scale and say goodbye to blocks. [See the documentation](https://apidocs.scrapeless.com/api-17509010).",
7-
version: "0.0.1",
7+
version: "0.0.2",
88
type: "action",
99
props: {
1010
scrapeless,
@@ -62,7 +62,7 @@ export default {
6262
throw new Error(response?.error || "Failed to retrieve crawling results");
6363
}
6464
},
65-
async additionalProps() {
65+
additionalProps() {
6666
const { apiServer } = this;
6767

6868
const props = {};

components/scrapeless/actions/get-scrape-result/get-scrape-result.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ export default {
44
key: "scrapeless-get-scrape-result",
55
name: "Get Scrape Result",
66
description: "Retrieve the result of a completed scraping job. [See the documentation](https://apidocs.scrapeless.com/api-11949853)",
7-
version: "0.0.2",
7+
version: "0.0.3",
88
type: "action",
99
props: {
1010
scrapeless,

components/scrapeless/actions/scraping-api/scraping-api.mjs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ export default {
44
key: "scrapeless-scraping-api",
55
name: "Scraping API",
66
description: "Endpoints for fresh, structured data from 100+ popular sites. [See the documentation](https://apidocs.scrapeless.com/api-12919045).",
7-
version: "0.0.1",
7+
version: "0.0.2",
88
type: "action",
99
props: {
1010
scrapeless,
@@ -91,7 +91,7 @@ export default {
9191
}
9292

9393
},
94-
async additionalProps() {
94+
additionalProps() {
9595
const { apiServer } = this;
9696

9797
const props = {};
Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,68 @@
1-
import { ConfigurationError } from "@pipedream/platform";
2-
import { ACTOR_OPTIONS } from "../../common/constants.mjs";
3-
import { parseObject } from "../../common/utils.mjs";
1+
import { COUNTRY_OPTIONS } from "../../common/constants.mjs";
2+
import { log } from "../../common/utils.mjs";
43
import scrapeless from "../../scrapeless.app.mjs";
54

65
export default {
76
key: "scrapeless-submit-scrape-job",
87
name: "Submit Scrape Job",
98
description: "Submit a new web scraping job with specified target URL and extraction rules. [See the documentation](https://apidocs.scrapeless.com/api-11949852)",
10-
version: "0.0.2",
9+
version: "0.0.3",
1110
type: "action",
1211
props: {
1312
scrapeless,
1413
actor: {
1514
type: "string",
1615
label: "Actor",
16+
default: "scraper.shopee",
1717
description: "The actor to use for the scrape job. This can be a specific user or a system account.",
18-
options: ACTOR_OPTIONS,
1918
},
2019
inputUrl: {
2120
type: "string",
2221
label: "Input URL",
2322
description: "Target URL to scrape. This is the URL of the web page you want to extract data from.",
24-
optional: true,
2523
},
2624
proxyCountry: {
2725
type: "string",
2826
label: "Proxy Country",
2927
description: "The country to route the request through. This can help in bypassing geo-restrictions.",
30-
optional: true,
31-
},
32-
additionalInput: {
33-
type: "object",
34-
label: "Additional Input",
35-
description: "Additional input parameters if you need to pass a specific configuration based on the actor. [See the documentation](https://apidocs.scrapeless.com/) for further details.",
36-
optional: true,
28+
default: "ANY",
29+
options: COUNTRY_OPTIONS.map((country) => ({
30+
label: country.label,
31+
value: country.value,
32+
})),
3733
},
3834
asyncMode: {
3935
type: "boolean",
4036
label: "Async Mode",
37+
default: true,
4138
description: "Whether to run the scrape job in asynchronous mode. If set to true, the job will be processed in the background.",
4239
},
4340
},
4441
async run({ $ }) {
4542
try {
4643
const data = {
4744
actor: this.actor,
48-
input: parseObject(this.additionalInput),
49-
};
50-
51-
if (this.asyncMode) {
52-
data.async = this.asyncMode;
53-
}
54-
if (this.inputUrl) {
55-
data.input.url = this.inputUrl;
56-
}
57-
if (this.proxyCountry) {
58-
data.proxy = {
45+
input: {
46+
url: this.inputUrl,
47+
},
48+
proxy: {
5949
country: this.proxyCountry,
60-
};
61-
}
50+
},
51+
async: this.asyncMode,
52+
};
6253

6354
const response = await this.scrapeless.submitScrapeJob({
6455
$,
6556
data,
6657
});
58+
log(response);
6759

6860
$.export("$summary", this.asyncMode
6961
? `Successfully submitted scrape job with ID: ${response.taskId}`
7062
: "Successfully scraped the target configuration.");
7163
return response;
72-
} catch ({ response }) {
73-
throw new ConfigurationError(response.data.message);
64+
} catch (error) {
65+
throw new Error(error.message);
7466
}
7567
},
7668
};

components/scrapeless/actions/universal-scraping-api/universal-scraping-api.mjs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
import scrapeless from "../../scrapeless.app.mjs";
2-
import { countryOptions } from "../../common/constants.mjs";
2+
import { COUNTRY_OPTIONS } from "../../common/constants.mjs";
33

44
export default {
55
key: "scrapeless-universal-scraping-api",
66
name: "Universal Scraping API",
77
description: "Access any website at scale and say goodbye to blocks. [See the documentation](https://apidocs.scrapeless.com/api-11949854).",
8-
version: "0.0.1",
8+
version: "0.0.2",
99
type: "action",
1010
props: {
1111
scrapeless,
@@ -46,7 +46,7 @@ export default {
4646
return response;
4747
}
4848
},
49-
async additionalProps() {
49+
additionalProps() {
5050
const { apiServer } = this;
5151

5252
const props = {};
@@ -74,7 +74,7 @@ export default {
7474
type: "string",
7575
label: "Country",
7676
default: "ANY",
77-
options: countryOptions.map((country) => ({
77+
options: COUNTRY_OPTIONS.map((country) => ({
7878
label: country.label,
7979
value: country.value,
8080
})),

components/scrapeless/common/constants.mjs

Lines changed: 1 addition & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -1,142 +1,4 @@
1-
export const ACTOR_OPTIONS = [
2-
{
3-
label: "Shopee",
4-
value: "scraper.shopee",
5-
},
6-
{
7-
label: "BR Sites - Solucoes cnpjreva",
8-
value: "scraper.solucoes",
9-
},
10-
{
11-
label: "BR Sites - Solucoes certidaointernet",
12-
value: "scraper.solucoes.certidaointernet",
13-
},
14-
{
15-
label: "BR Sites - Servicos receita",
16-
value: "scraper.servicos.receita",
17-
},
18-
{
19-
label: "BR Sites - Consopt",
20-
value: "scraper.consopt",
21-
},
22-
{
23-
label: "Avnet",
24-
value: "scraper.avnet",
25-
},
26-
{
27-
label: "Arrow",
28-
value: "scraper.arrow",
29-
},
30-
{
31-
label: "Airline Iberia",
32-
value: "scraper.iberia",
33-
},
34-
{
35-
label: "Airline Expedia",
36-
value: "scraper.expedia",
37-
},
38-
{
39-
label: "Airline Kayak",
40-
value: "scraper.kayak",
41-
},
42-
{
43-
label: "Amazon Product",
44-
value: "scraper.amazon.product",
45-
},
46-
{
47-
label: "Amazon Seller",
48-
value: "scraper.amazon.seller",
49-
},
50-
{
51-
label: "Amazon Keywords",
52-
value: "scraper.amazon.keywords",
53-
},
54-
{
55-
label: "Temu",
56-
value: "scraper.temu.mobile.detail",
57-
},
58-
{
59-
label: "Google Search",
60-
value: "scraper.google.search",
61-
},
62-
{
63-
label: "Google Trends",
64-
value: "scraper.google.trends",
65-
},
66-
{
67-
label: "Google FLights",
68-
value: "scraper.google.flights",
69-
},
70-
{
71-
label: "Google FLights Chart",
72-
value: "scraper.google.flights.chart",
73-
},
74-
{
75-
label: "Google Maps",
76-
value: "scraper.google.maps",
77-
},
78-
{
79-
label: "Google Scholar",
80-
value: "scraper.google.scholar",
81-
},
82-
{
83-
label: "Google Jobs",
84-
value: "scraper.google.jobs",
85-
},
86-
{
87-
label: "Google Shopping",
88-
value: "scraper.google.shopping",
89-
},
90-
{
91-
label: "Google Hotels",
92-
value: "scraper.google.hotels",
93-
},
94-
{
95-
label: "Google News",
96-
value: "scraper.google.news",
97-
},
98-
{
99-
label: "Google Lens",
100-
value: "scraper.google.lens",
101-
},
102-
{
103-
label: "Google Finance",
104-
value: "scraper.google.finance",
105-
},
106-
{
107-
label: "Google Product",
108-
value: "scraper.google.product",
109-
},
110-
{
111-
label: "Google Play Games",
112-
value: "scraper.google.play.games",
113-
},
114-
{
115-
label: "Google Play Books",
116-
value: "scraper.google.play.books",
117-
},
118-
{
119-
label: "Google Play Movies",
120-
value: "scraper.google.play.movies",
121-
},
122-
{
123-
label: "Google Play Product",
124-
value: "scraper.google.play.product",
125-
},
126-
{
127-
label: "Google Play Apps",
128-
value: "scraper.google.play",
129-
},
130-
{
131-
label: "Google Ads",
132-
value: "scraper.google.ads",
133-
},
134-
{
135-
label: "Mouser",
136-
value: "scraper.mouser",
137-
},
138-
];
139-
export const countryOptions = [
1+
export const COUNTRY_OPTIONS = [
1402
{
1413
regionId: -1,
1424
value: "ANY",

components/scrapeless/scrapeless.app.mjs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@ export default {
3737
submitScrapeJob(opts = {}) {
3838
return this._makeRequest({
3939
method: "POST",
40-
path: "/scraper/request",
40+
path: "/v1/scraper/request",
4141
...opts,
4242
});
4343
},
4444
getScrapeResult({ scrapeJobId }) {
4545
return this._makeRequest({
46-
path: `/scraper/result/${scrapeJobId}`,
46+
path: `/v1/scraper/result/${scrapeJobId}`,
4747
});
4848
},
4949
},

0 commit comments

Comments
 (0)