- 
                Notifications
    You must be signed in to change notification settings 
- Fork 5.5k
New Components - scrapeninja #15753
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
New Components - scrapeninja #15753
Changes from all commits
baa56ab
              3f31cb3
              eac6a0c
              cdc2434
              35beaba
              113f341
              49a3b46
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,231 @@ | ||
| import { ConfigurationError } from "@pipedream/platform"; | ||
| import { | ||
| clearObj, | ||
| parseError, parseObject, | ||
| } from "../../common/utils.mjs"; | ||
| import scrapeninja from "../../scrapeninja.app.mjs"; | ||
|  | ||
| export default { | ||
| key: "scrapeninja-scrape-with-js-rendering", | ||
| name: "Scrape with JS Rendering", | ||
| description: "Uses the ScrapeNinja real Chrome browser engine to scrape pages that require JS rendering. [See the documentation](https://scrapeninja.net/docs/api-reference/scrape-js/)", | ||
| version: "0.0.1", | ||
| type: "action", | ||
| props: { | ||
| scrapeninja, | ||
| url: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "url", | ||
| ], | ||
| }, | ||
| waitForSelector: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "waitForSelector", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| postWaitTime: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "postWaitTime", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| dumpIframe: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "dumpIframe", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| waitForSelectorIframe: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "waitForSelectorIframe", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| extractorTargetIframe: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "extractorTargetIframe", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| headers: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "headers", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| retryNum: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "retryNum", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| geo: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "geo", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| proxy: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "proxy", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| timeout: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "timeout", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| textNotExpected: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "textNotExpected", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| statusNotExpected: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "statusNotExpected", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| blockImages: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "blockImages", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| blockMedia: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "blockMedia", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| screenshot: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "screenshot", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| catchAjaxHeadersUrlMask: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "catchAjaxHeadersUrlMask", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| viewportWidth: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "viewportWidth", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| viewportHeight: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "viewportHeight", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| viewportDeviceScaleFactor: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "viewportDeviceScaleFactor", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| viewportHasTouch: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "viewportHasTouch", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| viewportIsMobile: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "viewportIsMobile", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| viewportIsLandscape: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "viewportIsLandscape", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| extractor: { | ||
| propDefinition: [ | ||
| scrapeninja, | ||
| "extractor", | ||
| ], | ||
| optional: true, | ||
| }, | ||
| }, | ||
| async run({ $ }) { | ||
| try { | ||
| const viewport = clearObj({ | ||
| width: this.viewportWidth, | ||
| height: this.viewportHeight, | ||
| deviceScaleFactor: this.viewportDeviceScaleFactor, | ||
| hasTouch: this.viewportHasTouch, | ||
| isMobile: this.viewportIsMobile, | ||
| isLandscape: this.viewportIsLandscape, | ||
| }); | ||
|  | ||
| const data = clearObj({ | ||
| url: this.url, | ||
| waitForSelector: this.waitForSelector, | ||
| postWaitTime: this.postWaitTime, | ||
| dumpIframe: this.dumpIframe, | ||
| waitForSelectorIframe: this.waitForSelectorIframe, | ||
| extractorTargetIframe: this.extractorTargetIframe, | ||
| headers: parseObject(this.headers), | ||
| retryNum: this.retryNum, | ||
| geo: this.geo, | ||
| proxy: this.proxy, | ||
| timeout: this.timeout, | ||
| textNotExpected: parseObject(this.textNotExpected), | ||
| statusNotExpected: parseObject(this.statusNotExpected), | ||
| blockImages: this.blockImages, | ||
| blockMedia: this.blockMedia, | ||
| screenshot: this.screenshot, | ||
| catchAjaxHeadersUrlMask: this.catchAjaxHeadersUrlMask, | ||
| extractor: this.extractor, | ||
| }); | ||
|  | ||
| if (Object.entries(viewport).length) { | ||
| data.viewport = viewport; | ||
| } | ||
|  | ||
| const response = await this.scrapeninja.scrapeJs({ | ||
| $, | ||
| data, | ||
| }); | ||
|  | ||
| $.export("$summary", `Successfully scraped ${this.url} with JS rendering`); | ||
| return response; | ||
| } catch ({ response: { data } }) { | ||
| throw new ConfigurationError(parseError(data)); | ||
| } | ||
| }, | ||
| }; | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,106 @@ | ||||||||||||||||
| import { ConfigurationError } from "@pipedream/platform"; | ||||||||||||||||
| import { parseObject } from "../../common/utils.mjs"; | ||||||||||||||||
| import scrapeninja from "../../scrapeninja.app.mjs"; | ||||||||||||||||
|  | ||||||||||||||||
| export default { | ||||||||||||||||
| key: "scrapeninja-scrape-without-js", | ||||||||||||||||
| name: "Scrape without JS", | ||||||||||||||||
| description: "Use high-performance web scraping endpoint with Chrome browser TLS fingerprint, but without JavaScript execution and real browser overhead. [See the documentation](https://scrapeninja.net/docs/api-reference/scrape/)", | ||||||||||||||||
| version: "0.0.1", | ||||||||||||||||
| type: "action", | ||||||||||||||||
| props: { | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| url: { | ||||||||||||||||
| propDefinition: [ | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| "url", | ||||||||||||||||
| ], | ||||||||||||||||
| }, | ||||||||||||||||
| headers: { | ||||||||||||||||
| propDefinition: [ | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| "headers", | ||||||||||||||||
| ], | ||||||||||||||||
| optional: true, | ||||||||||||||||
| }, | ||||||||||||||||
| retryNum: { | ||||||||||||||||
| propDefinition: [ | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| "retryNum", | ||||||||||||||||
| ], | ||||||||||||||||
| optional: true, | ||||||||||||||||
| }, | ||||||||||||||||
| geo: { | ||||||||||||||||
| propDefinition: [ | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| "geo", | ||||||||||||||||
| ], | ||||||||||||||||
| optional: true, | ||||||||||||||||
| }, | ||||||||||||||||
| proxy: { | ||||||||||||||||
| propDefinition: [ | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| "proxy", | ||||||||||||||||
| ], | ||||||||||||||||
| optional: true, | ||||||||||||||||
| }, | ||||||||||||||||
| followRedirects: { | ||||||||||||||||
| propDefinition: [ | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| "followRedirects", | ||||||||||||||||
| ], | ||||||||||||||||
| optional: true, | ||||||||||||||||
| }, | ||||||||||||||||
| timeout: { | ||||||||||||||||
| propDefinition: [ | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| "timeout", | ||||||||||||||||
| ], | ||||||||||||||||
| optional: true, | ||||||||||||||||
| }, | ||||||||||||||||
| textNotExpected: { | ||||||||||||||||
| propDefinition: [ | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| "textNotExpected", | ||||||||||||||||
| ], | ||||||||||||||||
| optional: true, | ||||||||||||||||
| }, | ||||||||||||||||
| statusNotExpected: { | ||||||||||||||||
| propDefinition: [ | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| "statusNotExpected", | ||||||||||||||||
| ], | ||||||||||||||||
| optional: true, | ||||||||||||||||
| }, | ||||||||||||||||
| extractor: { | ||||||||||||||||
| propDefinition: [ | ||||||||||||||||
| scrapeninja, | ||||||||||||||||
| "extractor", | ||||||||||||||||
| ], | ||||||||||||||||
| optional: true, | ||||||||||||||||
| }, | ||||||||||||||||
| }, | ||||||||||||||||
| async run({ $ }) { | ||||||||||||||||
| try { | ||||||||||||||||
| const response = await this.scrapeninja.scrapeNonJs({ | ||||||||||||||||
| $, | ||||||||||||||||
| data: { | ||||||||||||||||
| url: this.url, | ||||||||||||||||
| headers: parseObject(this.headers), | ||||||||||||||||
| retryNum: this.retryNum, | ||||||||||||||||
| geo: this.geo, | ||||||||||||||||
| proxy: this.proxy, | ||||||||||||||||
| followRedirects: this.followRedirects, | ||||||||||||||||
| timeout: this.timeout, | ||||||||||||||||
| textNotExpected: parseObject(this.textNotExpected), | ||||||||||||||||
| statusNotExpected: parseObject(this.statusNotExpected), | ||||||||||||||||
| extractor: this.extractor, | ||||||||||||||||
| }, | ||||||||||||||||
| }); | ||||||||||||||||
| $.export("$summary", "Successfully scraped the URL"); | ||||||||||||||||
| return response; | ||||||||||||||||
| } catch ({ response: { data } }) { | ||||||||||||||||
| throw new ConfigurationError(data.message || data.stderr); | ||||||||||||||||
| } | ||||||||||||||||
| 
      Comment on lines
    
      +102
     to 
      +104
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Guard against missing  Destructuring  } catch (err) {
-  throw new ConfigurationError(err.response.data.message || err.response.data.stderr);
+  const msg = err?.response?.data?.message || err?.response?.data?.stderr || "Unknown error";
+  throw new ConfigurationError(msg);
}📝 Committable suggestion
 
        Suggested change
       
 | ||||||||||||||||
| }, | ||||||||||||||||
| }; | ||||||||||||||||
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Prevent crashing on unexpected error shapes
Similar to the other action, the nested destructuring in the catch block can lead to runtime errors if
responseordataobjects are missing. Use safe access or a fallback to avoid an unhandled exception whenerrhas a different structure.} catch (err) { - throw new ConfigurationError(parseError(err.response.data)); + const safeData = err?.response?.data; + throw new ConfigurationError(parseError(safeData)); }