Skip to content

Commit 33a14e3

Browse files
lcaresiaGTFalcao
andauthored
[Components] scrapingant #13316 (#15379)
* Added actions * Done requests changes * Update general-extraction.mjs * Update components/scrapingant/scrapingant.app.mjs Co-authored-by: Guilherme Falcão <[email protected]> * Update components/scrapingant/scrapingant.app.mjs Co-authored-by: Guilherme Falcão <[email protected]> * Update components/scrapingant/scrapingant.app.mjs Co-authored-by: Guilherme Falcão <[email protected]> * Update components/scrapingant/scrapingant.app.mjs Co-authored-by: Guilherme Falcão <[email protected]> * Added actions * Done requests changes --------- Co-authored-by: Guilherme Falcão <[email protected]>
1 parent e7dd5ab commit 33a14e3

File tree

5 files changed

+326
-7
lines changed

5 files changed

+326
-7
lines changed
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import app from "../../scrapingant.app.mjs";
2+
3+
export default {
4+
key: "scrapingant-general-extraction",
5+
name: "General Extraction",
6+
description: "Send a request using the standard extraction method of ScrapingAnt. [See the documentation](https://docs.scrapingant.com/request-response-format)",
7+
version: "0.0.1",
8+
type: "action",
9+
props: {
10+
app,
11+
url: {
12+
propDefinition: [
13+
app,
14+
"url",
15+
],
16+
},
17+
browser: {
18+
propDefinition: [
19+
app,
20+
"browser",
21+
],
22+
reloadProps: true,
23+
},
24+
returnPageSource: {
25+
propDefinition: [
26+
app,
27+
"returnPageSource",
28+
],
29+
disabled: true,
30+
hidden: true,
31+
},
32+
cookies: {
33+
propDefinition: [
34+
app,
35+
"cookies",
36+
],
37+
},
38+
jsSnippet: {
39+
propDefinition: [
40+
app,
41+
"jsSnippet",
42+
],
43+
disabled: true,
44+
hidden: true,
45+
},
46+
proxyType: {
47+
propDefinition: [
48+
app,
49+
"proxyType",
50+
],
51+
},
52+
proxyCountry: {
53+
propDefinition: [
54+
app,
55+
"proxyCountry",
56+
],
57+
},
58+
waitForSelector: {
59+
propDefinition: [
60+
app,
61+
"waitForSelector",
62+
],
63+
},
64+
blockResource: {
65+
propDefinition: [
66+
app,
67+
"blockResource",
68+
],
69+
disabled: true,
70+
hidden: true,
71+
},
72+
},
73+
async additionalProps(existingProps) {
74+
const props = {};
75+
if (this.browser) {
76+
existingProps.returnPageSource.hidden = false;
77+
existingProps.returnPageSource.disabled = false;
78+
existingProps.jsSnippet.hidden = false;
79+
existingProps.jsSnippet.disabled = false;
80+
existingProps.blockResource.hidden = false;
81+
existingProps.blockResource.disabled = false;
82+
}
83+
84+
return props;
85+
},
86+
87+
async run({ $ }) {
88+
const response = await this.app.generalExtraction({
89+
$,
90+
params: {
91+
url: this.url,
92+
browser: this.browser,
93+
return_page_source: this.returnPageSource,
94+
cookies: this.cookies,
95+
js_snippet: this.jsSnippet,
96+
proxy_type: this.proxyType,
97+
proxy_country: this.proxyCountry,
98+
wait_for_selector: this.waitForSelector,
99+
block_resource: this.blockResource,
100+
},
101+
});
102+
$.export("$summary", "Successfully sent the request to ScrapingAnt");
103+
return response;
104+
},
105+
};
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
export default {
2+
PROXY_COUNTRIES: [
3+
{
4+
label: "World",
5+
value: "",
6+
},
7+
{
8+
label: "Brazil",
9+
value: "BR",
10+
},
11+
{
12+
label: "Canada",
13+
value: "CA",
14+
},
15+
{
16+
label: "China",
17+
value: "CN",
18+
},
19+
{
20+
label: "Czech Republic",
21+
value: "CZ",
22+
},
23+
{
24+
label: "France",
25+
value: "FR",
26+
},
27+
{
28+
label: "Germany",
29+
value: "DE",
30+
},
31+
{
32+
label: "Hong Kong",
33+
value: "HK",
34+
},
35+
{
36+
label: "India",
37+
value: "IN",
38+
},
39+
{
40+
label: "Indonesia",
41+
value: "ID",
42+
},
43+
{
44+
label: "Italy",
45+
value: "IT",
46+
},
47+
{
48+
label: "Israel",
49+
value: "IL",
50+
},
51+
{
52+
label: "Japan",
53+
value: "JP",
54+
},
55+
{
56+
label: "Netherlands",
57+
value: "NL",
58+
},
59+
{
60+
label: "Poland",
61+
value: "PL",
62+
},
63+
{
64+
label: "Russia",
65+
value: "RU",
66+
},
67+
{
68+
label: "Saudi Arabia",
69+
value: "SA",
70+
},
71+
{
72+
label: "Singapore",
73+
value: "SG",
74+
},
75+
{
76+
label: "South Korea",
77+
value: "KR",
78+
},
79+
{
80+
label: "Spain",
81+
value: "ES",
82+
},
83+
{
84+
label: "United Kingdom",
85+
value: "GB",
86+
},
87+
{
88+
label: "United Arab Emirates",
89+
value: "AE",
90+
},
91+
{
92+
label: "USA",
93+
value: "US",
94+
},
95+
{
96+
label: "Vietnam",
97+
value: "VN",
98+
},
99+
],
100+
PROXY_TYPES: [
101+
{
102+
label: "Residential",
103+
value: "residential",
104+
},
105+
{
106+
label: "Datacenter",
107+
value: "datacenter",
108+
},
109+
],
110+
RESOURCE_TYPES: [
111+
"document",
112+
"stylesheet",
113+
"image",
114+
"media",
115+
"font",
116+
"script",
117+
"texttrack",
118+
"xhr",
119+
"fetch",
120+
"eventsource",
121+
"websocket",
122+
"manifest",
123+
"other",
124+
],
125+
126+
};

components/scrapingant/package.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@pipedream/scrapingant",
3-
"version": "0.0.1",
3+
"version": "0.1.0",
44
"description": "Pipedream ScrapingAnt Components",
55
"main": "scrapingant.app.mjs",
66
"keywords": [
@@ -11,5 +11,8 @@
1111
"author": "Pipedream <[email protected]> (https://pipedream.com/)",
1212
"publishConfig": {
1313
"access": "public"
14+
},
15+
"dependencies": {
16+
"@pipedream/platform": "^3.0.3"
1417
}
15-
}
18+
}
Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,92 @@
1+
import { axios } from "@pipedream/platform";
2+
import constants from "./common/constants.mjs";
3+
14
export default {
25
type: "app",
36
app: "scrapingant",
4-
propDefinitions: {},
7+
propDefinitions: {
8+
url: {
9+
type: "string",
10+
label: "URL",
11+
description: "The URL to scrape",
12+
},
13+
browser: {
14+
type: "boolean",
15+
label: "Browser",
16+
description: "Enables using a headless browser for scraping",
17+
optional: true,
18+
},
19+
returnPageSource: {
20+
type: "boolean",
21+
label: "Return Page Source",
22+
description: "Enables returning data returned by the server and unaltered by the browser. When true JS won't be rendered",
23+
optional: true,
24+
},
25+
cookies: {
26+
type: "string",
27+
label: "Cookies",
28+
description: "Cookies to pass with a scraping request to the target site, i.e.: `cookie_name1=cookie_value1;cookie_name2=cookie_value2`",
29+
optional: true,
30+
},
31+
jsSnippet: {
32+
type: "string",
33+
label: "JS Snippet",
34+
description: "Base64 encoded JS snippet to run once page being loaded in the ScrapingAnt browser",
35+
optional: true,
36+
},
37+
proxyType: {
38+
type: "string",
39+
label: "Proxy Type",
40+
description: "Specifies the proxy type to make the request from",
41+
options: constants.PROXY_TYPES,
42+
optional: true,
43+
},
44+
proxyCountry: {
45+
type: "string",
46+
label: "Proxy Country",
47+
description: "Specifies the proxy country to make the request from",
48+
options: constants.PROXY_COUNTRIES,
49+
optional: true,
50+
},
51+
waitForSelector: {
52+
type: "string",
53+
label: "Wait for Selector",
54+
description: "The CSS selector of the element Scrapingant will wait for before returning the result",
55+
optional: true,
56+
},
57+
blockResource: {
58+
type: "string[]",
59+
label: "Block Resource",
60+
description: "Prevents cloud browser from loading specified resource types",
61+
options: constants.RESOURCE_TYPES,
62+
optional: true,
63+
},
64+
},
565
methods: {
6-
// this.$auth contains connected account data
7-
authKeys() {
8-
console.log(Object.keys(this.$auth));
66+
_baseUrl() {
67+
return "https://api.scrapingant.com/v2";
68+
},
69+
async _makeRequest(opts = {}) {
70+
const {
71+
$ = this,
72+
path,
73+
headers,
74+
...otherOpts
75+
} = opts;
76+
return axios($, {
77+
...otherOpts,
78+
url: this._baseUrl() + path,
79+
headers: {
80+
...headers,
81+
"x-api-key": `${this.$auth.api_token}`,
82+
},
83+
});
84+
},
85+
async generalExtraction(args = {}) {
86+
return this._makeRequest({
87+
path: "/general",
88+
...args,
89+
});
990
},
1091
},
1192
};

pnpm-lock.yaml

Lines changed: 5 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)