Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions components/scrapeninja/.gitignore

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
import { ConfigurationError } from "@pipedream/platform";
import {
clearObj,
parseError, parseObject,
} from "../../common/utils.mjs";
import scrapeninja from "../../scrapeninja.app.mjs";

export default {
key: "scrapeninja-scrape-with-js-rendering",
name: "Scrape with JS Rendering",
description: "Uses the ScrapeNinja real Chrome browser engine to scrape pages that require JS rendering. [See the documentation](https://scrapeninja.net/docs/api-reference/scrape-js/)",
version: "0.0.1",
type: "action",
props: {
scrapeninja,
url: {
propDefinition: [
scrapeninja,
"url",
],
},
waitForSelector: {
propDefinition: [
scrapeninja,
"waitForSelector",
],
optional: true,
},
postWaitTime: {
propDefinition: [
scrapeninja,
"postWaitTime",
],
optional: true,
},
dumpIframe: {
propDefinition: [
scrapeninja,
"dumpIframe",
],
optional: true,
},
waitForSelectorIframe: {
propDefinition: [
scrapeninja,
"waitForSelectorIframe",
],
optional: true,
},
extractorTargetIframe: {
propDefinition: [
scrapeninja,
"extractorTargetIframe",
],
optional: true,
},
headers: {
propDefinition: [
scrapeninja,
"headers",
],
optional: true,
},
retryNum: {
propDefinition: [
scrapeninja,
"retryNum",
],
optional: true,
},
geo: {
propDefinition: [
scrapeninja,
"geo",
],
optional: true,
},
proxy: {
propDefinition: [
scrapeninja,
"proxy",
],
optional: true,
},
timeout: {
propDefinition: [
scrapeninja,
"timeout",
],
optional: true,
},
textNotExpected: {
propDefinition: [
scrapeninja,
"textNotExpected",
],
optional: true,
},
statusNotExpected: {
propDefinition: [
scrapeninja,
"statusNotExpected",
],
optional: true,
},
blockImages: {
propDefinition: [
scrapeninja,
"blockImages",
],
optional: true,
},
blockMedia: {
propDefinition: [
scrapeninja,
"blockMedia",
],
optional: true,
},
screenshot: {
propDefinition: [
scrapeninja,
"screenshot",
],
optional: true,
},
catchAjaxHeadersUrlMask: {
propDefinition: [
scrapeninja,
"catchAjaxHeadersUrlMask",
],
optional: true,
},
viewportWidth: {
propDefinition: [
scrapeninja,
"viewportWidth",
],
optional: true,
},
viewportHeight: {
propDefinition: [
scrapeninja,
"viewportHeight",
],
optional: true,
},
viewportDeviceScaleFactor: {
propDefinition: [
scrapeninja,
"viewportDeviceScaleFactor",
],
optional: true,
},
viewportHasTouch: {
propDefinition: [
scrapeninja,
"viewportHasTouch",
],
optional: true,
},
viewportIsMobile: {
propDefinition: [
scrapeninja,
"viewportIsMobile",
],
optional: true,
},
viewportIsLandscape: {
propDefinition: [
scrapeninja,
"viewportIsLandscape",
],
optional: true,
},
extractor: {
propDefinition: [
scrapeninja,
"extractor",
],
optional: true,
},
},
async run({ $ }) {
try {
const viewport = clearObj({
width: this.viewportWidth,
height: this.viewportHeight,
deviceScaleFactor: this.viewportDeviceScaleFactor,
hasTouch: this.viewportHasTouch,
isMobile: this.viewportIsMobile,
isLandscape: this.viewportIsLandscape,
});

const data = clearObj({
url: this.url,
waitForSelector: this.waitForSelector,
postWaitTime: this.postWaitTime,
dumpIframe: this.dumpIframe,
waitForSelectorIframe: this.waitForSelectorIframe,
extractorTargetIframe: this.extractorTargetIframe,
headers: parseObject(this.headers),
retryNum: this.retryNum,
geo: this.geo,
proxy: this.proxy,
timeout: this.timeout,
textNotExpected: parseObject(this.textNotExpected),
statusNotExpected: parseObject(this.statusNotExpected),
blockImages: this.blockImages,
blockMedia: this.blockMedia,
screenshot: this.screenshot,
catchAjaxHeadersUrlMask: this.catchAjaxHeadersUrlMask,
extractor: this.extractor,
});

if (Object.entries(viewport).length) {
data.viewport = viewport;
}

const response = await this.scrapeninja.scrapeJs({
$,
data,
});

$.export("$summary", `Successfully scraped ${this.url} with JS rendering`);
return response;
} catch ({ response: { data } }) {
throw new ConfigurationError(parseError(data));
}
Comment on lines +227 to +229
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Prevent crashing on unexpected error shapes

Similar to the other action, the nested destructuring in the catch block can lead to runtime errors if response or data objects are missing. Use safe access or a fallback to avoid an unhandled exception when err has a different structure.

} catch (err) {
-  throw new ConfigurationError(parseError(err.response.data));
+  const safeData = err?.response?.data;
+  throw new ConfigurationError(parseError(safeData));
}

Committable suggestion skipped: line range outside the PR's diff.

},
};
106 changes: 106 additions & 0 deletions components/scrapeninja/actions/scrape-without-js/scrape-without-js.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import { ConfigurationError } from "@pipedream/platform";
import { parseObject } from "../../common/utils.mjs";
import scrapeninja from "../../scrapeninja.app.mjs";

export default {
key: "scrapeninja-scrape-without-js",
name: "Scrape without JS",
description: "Use high-performance web scraping endpoint with Chrome browser TLS fingerprint, but without JavaScript execution and real browser overhead. [See the documentation](https://scrapeninja.net/docs/api-reference/scrape/)",
version: "0.0.1",
type: "action",
props: {
scrapeninja,
url: {
propDefinition: [
scrapeninja,
"url",
],
},
headers: {
propDefinition: [
scrapeninja,
"headers",
],
optional: true,
},
retryNum: {
propDefinition: [
scrapeninja,
"retryNum",
],
optional: true,
},
geo: {
propDefinition: [
scrapeninja,
"geo",
],
optional: true,
},
proxy: {
propDefinition: [
scrapeninja,
"proxy",
],
optional: true,
},
followRedirects: {
propDefinition: [
scrapeninja,
"followRedirects",
],
optional: true,
},
timeout: {
propDefinition: [
scrapeninja,
"timeout",
],
optional: true,
},
textNotExpected: {
propDefinition: [
scrapeninja,
"textNotExpected",
],
optional: true,
},
statusNotExpected: {
propDefinition: [
scrapeninja,
"statusNotExpected",
],
optional: true,
},
extractor: {
propDefinition: [
scrapeninja,
"extractor",
],
optional: true,
},
},
async run({ $ }) {
try {
const response = await this.scrapeninja.scrapeNonJs({
$,
data: {
url: this.url,
headers: parseObject(this.headers),
retryNum: this.retryNum,
geo: this.geo,
proxy: this.proxy,
followRedirects: this.followRedirects,
timeout: this.timeout,
textNotExpected: parseObject(this.textNotExpected),
statusNotExpected: parseObject(this.statusNotExpected),
extractor: this.extractor,
},
});
$.export("$summary", "Successfully scraped the URL");
return response;
} catch ({ response: { data } }) {
throw new ConfigurationError(data.message || data.stderr);
}
Comment on lines +102 to +104
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Guard against missing data in error response

Destructuring ({ response: { data } }) throws a runtime error if response or data is undefined. Add a fallback or restructure the catch block to avoid uncaught exceptions in cases where the error format differs from the expected shape.

} catch (err) {
-  throw new ConfigurationError(err.response.data.message || err.response.data.stderr);
+  const msg = err?.response?.data?.message || err?.response?.data?.stderr || "Unknown error";
+  throw new ConfigurationError(msg);
}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
} catch ({ response: { data } }) {
throw new ConfigurationError(data.message || data.stderr);
}
} catch (err) {
const msg = err?.response?.data?.message || err?.response?.data?.stderr || "Unknown error";
throw new ConfigurationError(msg);
}

},
};
13 changes: 0 additions & 13 deletions components/scrapeninja/app/scrapeninja.app.ts

This file was deleted.

Loading
Loading