Skip to content

Commit 3f31cb3

Browse files
committed
[Components] scrapeninja #15137
Actions - Non JS Scraping - Scraping With JS Rendering
1 parent baa56ab commit 3f31cb3

File tree

7 files changed

+223
-202
lines changed

7 files changed

+223
-202
lines changed

components/scrapeninja/.gitignore

Lines changed: 0 additions & 3 deletions
This file was deleted.

components/scrapeninja/actions/non-js-scraping/non-js-scraping.mjs

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
1+
import { ConfigurationError } from "@pipedream/platform";
2+
import { parseObject } from "../../common/utils.mjs";
13
import scrapeninja from "../../scrapeninja.app.mjs";
2-
import { axios } from "@pipedream/platform";
34

45
export default {
56
key: "scrapeninja-non-js-scraping",
6-
name: "ScrapeNinja Non-JS Scraping",
7-
description: "Use ScrapeNinja's high-performance non-JS scraping endpoint. [See the documentation]()",
8-
version: "0.0.{{ts}}",
7+
name: "Non-JS Scraping",
8+
description: "Use high-performance web scraping endpoint with Chrome browser TLS fingerprint, but without JavaScript execution and real browser overhead. [See the documentation](https://scrapeninja.net/docs/api-reference/scrape/)",
9+
version: "0.0.1",
910
type: "action",
1011
props: {
11-
scrapeninja: {
12-
type: "app",
13-
app: "scrapeninja",
14-
},
12+
scrapeninja,
1513
url: {
1614
propDefinition: [
1715
scrapeninja,
@@ -25,10 +23,10 @@ export default {
2523
],
2624
optional: true,
2725
},
28-
retrynum: {
26+
retryNum: {
2927
propDefinition: [
3028
scrapeninja,
31-
"retrynum",
29+
"retryNum",
3230
],
3331
optional: true,
3432
},
@@ -46,10 +44,10 @@ export default {
4644
],
4745
optional: true,
4846
},
49-
followredirects: {
47+
followRedirects: {
5048
propDefinition: [
5149
scrapeninja,
52-
"followredirects",
50+
"followRedirects",
5351
],
5452
optional: true,
5553
},
@@ -60,17 +58,17 @@ export default {
6058
],
6159
optional: true,
6260
},
63-
textnotexpected: {
61+
textNotExpected: {
6462
propDefinition: [
6563
scrapeninja,
66-
"textnotexpected",
64+
"textNotExpected",
6765
],
6866
optional: true,
6967
},
70-
statusnotexpected: {
68+
statusNotExpected: {
7169
propDefinition: [
7270
scrapeninja,
73-
"statusnotexpected",
71+
"statusNotExpected",
7472
],
7573
optional: true,
7674
},
@@ -83,8 +81,26 @@ export default {
8381
},
8482
},
8583
async run({ $ }) {
86-
const response = await this.scrapeninja.scrapeNonJs();
87-
$.export("$summary", "Successfully scraped the URL");
88-
return response;
84+
try {
85+
const response = await this.scrapeninja.scrapeNonJs({
86+
$,
87+
data: {
88+
url: this.url,
89+
headers: parseObject(this.headers),
90+
retryNum: this.retryNum,
91+
geo: this.geo,
92+
proxy: this.proxy,
93+
followRedirects: this.followRedirects,
94+
timeout: this.timeout,
95+
textNotExpected: parseObject(this.textNotExpected),
96+
statusNotExpected: parseObject(this.statusNotExpected),
97+
extractor: this.extractor,
98+
},
99+
});
100+
$.export("$summary", "Successfully scraped the URL");
101+
return response;
102+
} catch ({ response: { data } }) {
103+
throw new ConfigurationError(data.message || data.stderr);
104+
}
89105
},
90106
};
Lines changed: 78 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1+
import { ConfigurationError } from "@pipedream/platform";
2+
import {
3+
clearObj,
4+
parseError, parseObject,
5+
} from "../../common/utils.mjs";
16
import scrapeninja from "../../scrapeninja.app.mjs";
2-
import { axios } from "@pipedream/platform";
37

48
export default {
59
key: "scrapeninja-scraping-with-js-rendering",
6-
name: "ScrapeNinja Scraping with JS Rendering",
10+
name: "Scraping with JS Rendering",
711
description: "Uses the ScrapeNinja real Chrome browser engine to scrape pages that require JS rendering. [See the documentation](https://scrapeninja.net/docs/api-reference/scrape-js/)",
8-
version: "0.0.{{ts}}",
12+
version: "0.0.1",
913
type: "action",
1014
props: {
1115
scrapeninja,
@@ -15,38 +19,38 @@ export default {
1519
"url",
1620
],
1721
},
18-
waitforselector: {
22+
waitForSelector: {
1923
propDefinition: [
2024
scrapeninja,
21-
"waitforselector",
25+
"waitForSelector",
2226
],
2327
optional: true,
2428
},
25-
postwaittime: {
29+
postWaitTime: {
2630
propDefinition: [
2731
scrapeninja,
28-
"postwaittime",
32+
"postWaitTime",
2933
],
3034
optional: true,
3135
},
32-
dumpiframe: {
36+
dumpIframe: {
3337
propDefinition: [
3438
scrapeninja,
35-
"dumpiframe",
39+
"dumpIframe",
3640
],
3741
optional: true,
3842
},
39-
waitforselectoriframe: {
43+
waitForSelectorIframe: {
4044
propDefinition: [
4145
scrapeninja,
42-
"waitforselectoriframe",
46+
"waitForSelectorIframe",
4347
],
4448
optional: true,
4549
},
46-
extractortargetiframe: {
50+
extractorTargetIframe: {
4751
propDefinition: [
4852
scrapeninja,
49-
"extractortargetiframe",
53+
"extractorTargetIframe",
5054
],
5155
optional: true,
5256
},
@@ -57,10 +61,10 @@ export default {
5761
],
5862
optional: true,
5963
},
60-
retrynum: {
64+
retryNum: {
6165
propDefinition: [
6266
scrapeninja,
63-
"retrynum",
67+
"retryNum",
6468
],
6569
optional: true,
6670
},
@@ -85,31 +89,31 @@ export default {
8589
],
8690
optional: true,
8791
},
88-
textnotexpected: {
92+
textNotExpected: {
8993
propDefinition: [
9094
scrapeninja,
91-
"textnotexpected",
95+
"textNotExpected",
9296
],
9397
optional: true,
9498
},
95-
statusnotexpected: {
99+
statusNotExpected: {
96100
propDefinition: [
97101
scrapeninja,
98-
"statusnotexpected",
102+
"statusNotExpected",
99103
],
100104
optional: true,
101105
},
102-
blockimages: {
106+
blockImages: {
103107
propDefinition: [
104108
scrapeninja,
105-
"blockimages",
109+
"blockImages",
106110
],
107111
optional: true,
108112
},
109-
blockmedia: {
113+
blockMedia: {
110114
propDefinition: [
111115
scrapeninja,
112-
"blockmedia",
116+
"blockMedia",
113117
],
114118
optional: true,
115119
},
@@ -120,52 +124,52 @@ export default {
120124
],
121125
optional: true,
122126
},
123-
catchajaxheadersurlmask: {
127+
catchAjaxHeadersUrlMask: {
124128
propDefinition: [
125129
scrapeninja,
126-
"catchajaxheadersurlmask",
130+
"catchAjaxHeadersUrlMask",
127131
],
128132
optional: true,
129133
},
130134
viewportWidth: {
131135
propDefinition: [
132136
scrapeninja,
133-
"viewportwitdh",
137+
"viewportWitdh",
134138
],
135139
optional: true,
136140
},
137141
viewportHeight: {
138142
propDefinition: [
139143
scrapeninja,
140-
"viewportheight",
144+
"viewportHeight",
141145
],
142146
optional: true,
143147
},
144148
viewportDeviceScaleFactor: {
145149
propDefinition: [
146150
scrapeninja,
147-
"viewportdevicescalefactor",
151+
"viewportDeviceScaleFactor",
148152
],
149153
optional: true,
150154
},
151155
viewportHasTouch: {
152156
propDefinition: [
153157
scrapeninja,
154-
"viewporthastouch",
158+
"viewportHasTouch",
155159
],
156160
optional: true,
157161
},
158162
viewportIsMobile: {
159163
propDefinition: [
160164
scrapeninja,
161-
"viewportismobile",
165+
"viewportIsMobile",
162166
],
163167
optional: true,
164168
},
165169
viewportIsLandscape: {
166170
propDefinition: [
167171
scrapeninja,
168-
"viewportislandscape",
172+
"viewportIsLandscape",
169173
],
170174
optional: true,
171175
},
@@ -178,38 +182,50 @@ export default {
178182
},
179183
},
180184
async run({ $ }) {
181-
const viewport = {
182-
width: this.viewportWidth,
183-
height: this.viewportHeight,
184-
deviceScaleFactor: this.viewportDeviceScaleFactor,
185-
hasTouch: this.viewportHasTouch,
186-
isMobile: this.viewportIsMobile,
187-
isLandscape: this.viewportIsLandscape,
188-
};
185+
try {
186+
const viewport = clearObj({
187+
width: this.viewportWidth,
188+
height: this.viewportHeight,
189+
deviceScaleFactor: this.viewportDeviceScaleFactor,
190+
hasTouch: this.viewportHasTouch,
191+
isMobile: this.viewportIsMobile,
192+
isLandscape: this.viewportIsLandscape,
193+
});
189194

190-
const response = await this.scrapeninja.scrapeJs({
191-
url: this.url,
192-
waitForSelector: this.waitforselector,
193-
postWaitTime: this.postwaittime,
194-
dumpIframe: this.dumpiframe,
195-
waitForSelectorIframe: this.waitforselectoriframe,
196-
extractorTargetIframe: this.extractortargetiframe,
197-
headers: this.headers,
198-
retryNum: this.retrynum,
199-
geo: this.geo,
200-
proxy: this.proxy,
201-
timeout: this.timeout,
202-
textNotExpected: this.textnotexpected,
203-
statusNotExpected: this.statusnotexpected,
204-
blockImages: this.blockimages,
205-
blockMedia: this.blockmedia,
206-
screenshot: this.screenshot,
207-
catchAjaxHeadersUrlMask: this.catchajaxheadersurlmask,
208-
viewport,
209-
extractor: this.extractor,
210-
});
195+
const data = clearObj({
196+
url: this.url,
197+
waitForSelector: this.waitForSelector,
198+
postWaitTime: this.postWaitTime,
199+
dumpIframe: this.dumpIframe,
200+
waitForSelectorIframe: this.waitForSelectorIframe,
201+
extractorTargetIframe: this.extractorTargetIframe,
202+
headers: parseObject(this.headers),
203+
retryNum: this.retryNum,
204+
geo: this.geo,
205+
proxy: this.proxy,
206+
timeout: this.timeout,
207+
textNotExpected: parseObject(this.textNotExpected),
208+
statusNotExpected: parseObject(this.statusNotExpected),
209+
blockImages: this.blockImages,
210+
blockMedia: this.blockMedia,
211+
screenshot: this.screenshot,
212+
catchAjaxHeadersUrlMask: this.catchAjaxHeadersUrlMask,
213+
extractor: this.extractor,
214+
});
211215

212-
$.export("$summary", `Successfully scraped ${this.url} with JS rendering`);
213-
return response;
216+
if (Object.entries(viewport).length) {
217+
data.viewport = viewport;
218+
}
219+
220+
const response = await this.scrapeninja.scrapeJs({
221+
$,
222+
data,
223+
});
224+
225+
$.export("$summary", `Successfully scraped ${this.url} with JS rendering`);
226+
return response;
227+
} catch ({ response: { data } }) {
228+
throw new ConfigurationError(parseError(data));
229+
}
214230
},
215231
};

components/scrapeninja/app/scrapeninja.app.ts

Lines changed: 0 additions & 13 deletions
This file was deleted.

0 commit comments

Comments
 (0)