Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions components/scrapeninja/.gitignore

This file was deleted.

106 changes: 106 additions & 0 deletions components/scrapeninja/actions/non-js-scraping/non-js-scraping.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import { ConfigurationError } from "@pipedream/platform";
import { parseObject } from "../../common/utils.mjs";
import scrapeninja from "../../scrapeninja.app.mjs";

export default {
key: "scrapeninja-non-js-scraping",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@luancazarine can you rename the actual action names / slugs? Since they are new components I think it'd be best for them to match the name before being shipped

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The filenames are still the same - maybe the commit wasn't pushed?
/actions/non-js-scraping/non-js-scraping.mjs
should match the action name, as in
/actions/scrape-without-js/scrape-without-js.mjs
The same goes for the component key, for both components

name: "Non-JS Scraping",
description: "Use high-performance web scraping endpoint with Chrome browser TLS fingerprint, but without JavaScript execution and real browser overhead. [See the documentation](https://scrapeninja.net/docs/api-reference/scrape/)",
version: "0.0.1",
type: "action",
props: {
scrapeninja,
url: {
propDefinition: [
scrapeninja,
"url",
],
},
headers: {
propDefinition: [
scrapeninja,
"headers",
],
optional: true,
},
retryNum: {
propDefinition: [
scrapeninja,
"retryNum",
],
optional: true,
},
geo: {
propDefinition: [
scrapeninja,
"geo",
],
optional: true,
},
proxy: {
propDefinition: [
scrapeninja,
"proxy",
],
optional: true,
},
followRedirects: {
propDefinition: [
scrapeninja,
"followRedirects",
],
optional: true,
},
timeout: {
propDefinition: [
scrapeninja,
"timeout",
],
optional: true,
},
textNotExpected: {
propDefinition: [
scrapeninja,
"textNotExpected",
],
optional: true,
},
statusNotExpected: {
propDefinition: [
scrapeninja,
"statusNotExpected",
],
optional: true,
},
extractor: {
propDefinition: [
scrapeninja,
"extractor",
],
optional: true,
},
},
async run({ $ }) {
try {
const response = await this.scrapeninja.scrapeNonJs({
$,
data: {
url: this.url,
headers: parseObject(this.headers),
retryNum: this.retryNum,
geo: this.geo,
proxy: this.proxy,
followRedirects: this.followRedirects,
timeout: this.timeout,
textNotExpected: parseObject(this.textNotExpected),
statusNotExpected: parseObject(this.statusNotExpected),
extractor: this.extractor,
},
});
$.export("$summary", "Successfully scraped the URL");
return response;
} catch ({ response: { data } }) {
throw new ConfigurationError(data.message || data.stderr);
}
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
import { ConfigurationError } from "@pipedream/platform";
import {
clearObj,
parseError, parseObject,
} from "../../common/utils.mjs";
import scrapeninja from "../../scrapeninja.app.mjs";

export default {
key: "scrapeninja-scraping-with-js-rendering",
name: "Scraping with JS Rendering",
description: "Uses the ScrapeNinja real Chrome browser engine to scrape pages that require JS rendering. [See the documentation](https://scrapeninja.net/docs/api-reference/scrape-js/)",
version: "0.0.1",
type: "action",
props: {
scrapeninja,
url: {
propDefinition: [
scrapeninja,
"url",
],
},
waitForSelector: {
propDefinition: [
scrapeninja,
"waitForSelector",
],
optional: true,
},
postWaitTime: {
propDefinition: [
scrapeninja,
"postWaitTime",
],
optional: true,
},
dumpIframe: {
propDefinition: [
scrapeninja,
"dumpIframe",
],
optional: true,
},
waitForSelectorIframe: {
propDefinition: [
scrapeninja,
"waitForSelectorIframe",
],
optional: true,
},
extractorTargetIframe: {
propDefinition: [
scrapeninja,
"extractorTargetIframe",
],
optional: true,
},
headers: {
propDefinition: [
scrapeninja,
"headers",
],
optional: true,
},
retryNum: {
propDefinition: [
scrapeninja,
"retryNum",
],
optional: true,
},
geo: {
propDefinition: [
scrapeninja,
"geo",
],
optional: true,
},
proxy: {
propDefinition: [
scrapeninja,
"proxy",
],
optional: true,
},
timeout: {
propDefinition: [
scrapeninja,
"timeout",
],
optional: true,
},
textNotExpected: {
propDefinition: [
scrapeninja,
"textNotExpected",
],
optional: true,
},
statusNotExpected: {
propDefinition: [
scrapeninja,
"statusNotExpected",
],
optional: true,
},
blockImages: {
propDefinition: [
scrapeninja,
"blockImages",
],
optional: true,
},
blockMedia: {
propDefinition: [
scrapeninja,
"blockMedia",
],
optional: true,
},
screenshot: {
propDefinition: [
scrapeninja,
"screenshot",
],
optional: true,
},
catchAjaxHeadersUrlMask: {
propDefinition: [
scrapeninja,
"catchAjaxHeadersUrlMask",
],
optional: true,
},
viewportWidth: {
propDefinition: [
scrapeninja,
"viewportWitdh",
],
optional: true,
},
viewportHeight: {
propDefinition: [
scrapeninja,
"viewportHeight",
],
optional: true,
},
viewportDeviceScaleFactor: {
propDefinition: [
scrapeninja,
"viewportDeviceScaleFactor",
],
optional: true,
},
viewportHasTouch: {
propDefinition: [
scrapeninja,
"viewportHasTouch",
],
optional: true,
},
viewportIsMobile: {
propDefinition: [
scrapeninja,
"viewportIsMobile",
],
optional: true,
},
viewportIsLandscape: {
propDefinition: [
scrapeninja,
"viewportIsLandscape",
],
optional: true,
},
extractor: {
propDefinition: [
scrapeninja,
"extractor",
],
optional: true,
},
},
async run({ $ }) {
try {
const viewport = clearObj({
width: this.viewportWidth,
height: this.viewportHeight,
deviceScaleFactor: this.viewportDeviceScaleFactor,
hasTouch: this.viewportHasTouch,
isMobile: this.viewportIsMobile,
isLandscape: this.viewportIsLandscape,
});

const data = clearObj({
url: this.url,
waitForSelector: this.waitForSelector,
postWaitTime: this.postWaitTime,
dumpIframe: this.dumpIframe,
waitForSelectorIframe: this.waitForSelectorIframe,
extractorTargetIframe: this.extractorTargetIframe,
headers: parseObject(this.headers),
retryNum: this.retryNum,
geo: this.geo,
proxy: this.proxy,
timeout: this.timeout,
textNotExpected: parseObject(this.textNotExpected),
statusNotExpected: parseObject(this.statusNotExpected),
blockImages: this.blockImages,
blockMedia: this.blockMedia,
screenshot: this.screenshot,
catchAjaxHeadersUrlMask: this.catchAjaxHeadersUrlMask,
extractor: this.extractor,
});

if (Object.entries(viewport).length) {
data.viewport = viewport;
}

const response = await this.scrapeninja.scrapeJs({
$,
data,
});

$.export("$summary", `Successfully scraped ${this.url} with JS rendering`);
return response;
} catch ({ response: { data } }) {
throw new ConfigurationError(parseError(data));
}
},
};
13 changes: 0 additions & 13 deletions components/scrapeninja/app/scrapeninja.app.ts

This file was deleted.

Loading
Loading