-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Spider new components #14288
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Spider new components #14288
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,46 @@ | ||||||||||||||||||||||||||||
| import spider from "../../spider.app.mjs"; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| export default { | ||||||||||||||||||||||||||||
| key: "spider-scrape-new-page", | ||||||||||||||||||||||||||||
| name: "Scrape New Page", | ||||||||||||||||||||||||||||
| description: "Initiates a new page scrape (crawl). [See the documentation](https://spider.cloud/docs/api#crawl-website)", | ||||||||||||||||||||||||||||
| version: "0.0.1", | ||||||||||||||||||||||||||||
| type: "action", | ||||||||||||||||||||||||||||
| props: { | ||||||||||||||||||||||||||||
| spider, | ||||||||||||||||||||||||||||
| infoBox: { | ||||||||||||||||||||||||||||
| type: "alert", | ||||||||||||||||||||||||||||
| alertType: "info", | ||||||||||||||||||||||||||||
| content: "See [the Spider documentation](https://spider.cloud/docs/api#crawl-website) for information on limits and best practices.", | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
| url: { | ||||||||||||||||||||||||||||
| type: "string", | ||||||||||||||||||||||||||||
| label: "URL", | ||||||||||||||||||||||||||||
| description: "The URI resource to crawl, e.g. `https://spider.cloud`. This can be a comma split list for multiple urls.", | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
| limit: { | ||||||||||||||||||||||||||||
| type: "integer", | ||||||||||||||||||||||||||||
| label: "Limit", | ||||||||||||||||||||||||||||
| description: "The maximum amount of pages allowed to crawl per website. Default is 0, which crawls all pages.", | ||||||||||||||||||||||||||||
| optional: true, | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
| storeData: { | ||||||||||||||||||||||||||||
| type: "boolean", | ||||||||||||||||||||||||||||
| label: "Store Data", | ||||||||||||||||||||||||||||
| description: "Decide whether to store data. Default is `false`.", | ||||||||||||||||||||||||||||
| optional: true, | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
|
Comment on lines
+27
to
+32
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a default value to the The Apply this diff to set the default value for storeData: {
type: "boolean",
label: "Store Data",
description: "Decide whether to store data. Default is `false`.",
optional: true,
+ default: false,
},📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
| async run({ $ }) { | ||||||||||||||||||||||||||||
| const content = await this.spider.initiateCrawl({ | ||||||||||||||||||||||||||||
| $, | ||||||||||||||||||||||||||||
| data: { | ||||||||||||||||||||||||||||
| url: this.url, | ||||||||||||||||||||||||||||
| limit: this.limit, | ||||||||||||||||||||||||||||
| store_data: this.storeData, | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
| }); | ||||||||||||||||||||||||||||
| $.export("$summary", `Successfully scraped URL ${this.url}`); | ||||||||||||||||||||||||||||
| return content; | ||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||
| }; | ||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| { | ||
| "name": "@pipedream/spider", | ||
| "version": "0.0.1", | ||
| "version": "0.1.0", | ||
| "description": "Pipedream Spider Components", | ||
| "main": "spider.app.mjs", | ||
| "keywords": [ | ||
|
|
@@ -11,5 +11,8 @@ | |
| "author": "Pipedream <[email protected]> (https://pipedream.com/)", | ||
| "publishConfig": { | ||
| "access": "public" | ||
| }, | ||
| "dependencies": { | ||
| "@pipedream/platform": "^3.0.3" | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,11 +1,32 @@ | ||||||||||||||
| import { axios } from "@pipedream/platform"; | ||||||||||||||
|
|
||||||||||||||
| export default { | ||||||||||||||
| type: "app", | ||||||||||||||
| app: "spider", | ||||||||||||||
| propDefinitions: {}, | ||||||||||||||
| methods: { | ||||||||||||||
| // this.$auth contains connected account data | ||||||||||||||
| authKeys() { | ||||||||||||||
| console.log(Object.keys(this.$auth)); | ||||||||||||||
| _baseUrl() { | ||||||||||||||
| return "https://api.spider.cloud"; | ||||||||||||||
| }, | ||||||||||||||
|
Comment on lines
+8
to
+10
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Consider using an environment variable for the base URL. While the implementation is correct, hardcoding the base URL might make it difficult to change in the future, especially if there are different environments (e.g., staging, production). Consider using an environment variable: _baseUrl() {
- return "https://api.spider.cloud";
+ return process.env.SPIDER_API_BASE_URL || "https://api.spider.cloud";
},This change would allow for easier configuration across different environments while maintaining the current URL as a default. 📝 Committable suggestion
Suggested change
|
||||||||||||||
| async _makeRequest({ | ||||||||||||||
| $ = this, path = "/", headers, ...otherOpts | ||||||||||||||
| } = {}) { | ||||||||||||||
| return axios($, { | ||||||||||||||
| ...otherOpts, | ||||||||||||||
| url: this._baseUrl() + path, | ||||||||||||||
| headers: { | ||||||||||||||
| ...headers, | ||||||||||||||
| "Authorization": `Bearer ${this.$auth.api_key}`, | ||||||||||||||
| "Content-Type": "application/json", | ||||||||||||||
| }, | ||||||||||||||
| }); | ||||||||||||||
| }, | ||||||||||||||
| async initiateCrawl(args) { | ||||||||||||||
| return this._makeRequest({ | ||||||||||||||
| method: "POST", | ||||||||||||||
| path: "/crawl", | ||||||||||||||
| ...args, | ||||||||||||||
| }); | ||||||||||||||
| }, | ||||||||||||||
| }, | ||||||||||||||
| }; | ||||||||||||||
| }; | ||||||||||||||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a default value to the
limitprop to ensure it defaults to 0.The
limitprop is optional, but according to the description, it defaults to 0, which crawls all pages. Without specifying a default value in the prop definition,this.limitmay beundefinedwhen the action runs. Adding adefaultproperty will ensure it defaults to 0 when not specified by the user.Apply this diff to set the default value for
limit:limit: { type: "integer", label: "Limit", description: "The maximum amount of pages allowed to crawl per website. Default is 0, which crawls all pages.", optional: true, + default: 0, },📝 Committable suggestion