From 20fef2e62b5eacbbd92e49fc586dda6e8d32b588 Mon Sep 17 00:00:00 2001 From: GTFalcao Date: Fri, 11 Oct 2024 12:45:02 -0300 Subject: [PATCH 1/5] Initial AI-generated code --- .../scrape-new-page/scrape-new-page.mjs | 23 ++++++++++ components/spider/package.json | 2 +- components/spider/spider.app.mjs | 43 ++++++++++++++++++- 3 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 components/spider/actions/scrape-new-page/scrape-new-page.mjs diff --git a/components/spider/actions/scrape-new-page/scrape-new-page.mjs b/components/spider/actions/scrape-new-page/scrape-new-page.mjs new file mode 100644 index 0000000000000..d11ef8a0b6a9b --- /dev/null +++ b/components/spider/actions/scrape-new-page/scrape-new-page.mjs @@ -0,0 +1,23 @@ +import spider from "../../spider.app.mjs"; + +export default { + key: "spider-scrape-new-page", + name: "Scrape New Page", + description: "Initiates a new page scrape. [See the documentation](https://spider.cloud/docs/api)", + version: "0.0.{{ts}}", + type: "action", + props: { + spider, + url: { + propDefinition: [ + spider, + "url", + ], + }, + }, + async run({ $ }) { + const content = await this.spider.initiateCrawl(); + $.export("$summary", `Successfully scraped content from ${this.url}`); + return content; + }, +}; diff --git a/components/spider/package.json b/components/spider/package.json index cc316e1ccfb32..f9993feed75cb 100644 --- a/components/spider/package.json +++ b/components/spider/package.json @@ -12,4 +12,4 @@ "publishConfig": { "access": "public" } -} \ No newline at end of file +} diff --git a/components/spider/spider.app.mjs b/components/spider/spider.app.mjs index 98004efd50fdd..d5614d3834ff3 100644 --- a/components/spider/spider.app.mjs +++ b/components/spider/spider.app.mjs @@ -1,11 +1,50 @@ +import { axios } from "@pipedream/platform"; + export default { type: "app", app: "spider", - propDefinitions: {}, + propDefinitions: { + url: { + type: "string", + label: "URL to Scrape", + description: "The URL of the page to scrape.", + }, + }, methods: { // this.$auth contains connected account data authKeys() { console.log(Object.keys(this.$auth)); }, + _baseUrl() { + return "https://api.spider.cloud"; + }, + async _makeRequest(opts = {}) { + const { + $ = this, method = "GET", path = "/", headers, ...otherOpts + } = opts; + return axios($, { + ...otherOpts, + method, + url: this._baseUrl() + path, + headers: { + ...headers, + "Authorization": `Bearer ${this.$auth.api_key}`, + "Content-Type": "application/json", + }, + }); + }, + async initiateCrawl() { + const response = await this._makeRequest({ + method: "POST", + path: "/crawl", + data: { + url: this.url, + }, + }); + if (Array.isArray(response) && response.length > 0) { + return response[0].content; + } + throw new Error("No content returned from crawl"); + }, }, -}; \ No newline at end of file +}; From 3adb077c126d216d6ea700d0712268d52560f8bd Mon Sep 17 00:00:00 2001 From: GTFalcao Date: Mon, 14 Oct 2024 02:56:58 -0300 Subject: [PATCH 2/5] Package and app file updates --- components/spider/package.json | 5 ++++- components/spider/spider.app.mjs | 32 +++++++------------------------- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/components/spider/package.json b/components/spider/package.json index f9993feed75cb..6f467bb3f29fe 100644 --- a/components/spider/package.json +++ b/components/spider/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/spider", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream Spider Components", "main": "spider.app.mjs", "keywords": [ @@ -11,5 +11,8 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.0.3" } } diff --git a/components/spider/spider.app.mjs b/components/spider/spider.app.mjs index d5614d3834ff3..115ffb6352914 100644 --- a/components/spider/spider.app.mjs +++ b/components/spider/spider.app.mjs @@ -3,28 +3,16 @@ import { axios } from "@pipedream/platform"; export default { type: "app", app: "spider", - propDefinitions: { - url: { - type: "string", - label: "URL to Scrape", - description: "The URL of the page to scrape.", - }, - }, + propDefinitions: {}, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); - }, _baseUrl() { return "https://api.spider.cloud"; }, - async _makeRequest(opts = {}) { - const { - $ = this, method = "GET", path = "/", headers, ...otherOpts - } = opts; + async _makeRequest({ + $ = this, path = "/", headers, ...otherOpts + } = {}) { return axios($, { ...otherOpts, - method, url: this._baseUrl() + path, headers: { ...headers, @@ -33,18 +21,12 @@ export default { }, }); }, - async initiateCrawl() { - const response = await this._makeRequest({ + async initiateCrawl(args) { + return this._makeRequest({ method: "POST", path: "/crawl", - data: { - url: this.url, - }, + ...args, }); - if (Array.isArray(response) && response.length > 0) { - return response[0].content; - } - throw new Error("No content returned from crawl"); }, }, }; From 991e3b80a2e88ad36414467a692343269f7d5728 Mon Sep 17 00:00:00 2001 From: GTFalcao Date: Mon, 14 Oct 2024 02:58:52 -0300 Subject: [PATCH 3/5] Action adjustments --- .../scrape-new-page/scrape-new-page.mjs | 37 +++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/components/spider/actions/scrape-new-page/scrape-new-page.mjs b/components/spider/actions/scrape-new-page/scrape-new-page.mjs index d11ef8a0b6a9b..83bf2b3ee59db 100644 --- a/components/spider/actions/scrape-new-page/scrape-new-page.mjs +++ b/components/spider/actions/scrape-new-page/scrape-new-page.mjs @@ -3,21 +3,44 @@ import spider from "../../spider.app.mjs"; export default { key: "spider-scrape-new-page", name: "Scrape New Page", - description: "Initiates a new page scrape. [See the documentation](https://spider.cloud/docs/api)", + description: "Initiates a new page scrape (crawl). [See the documentation](https://spider.cloud/docs/api#crawl-website)", version: "0.0.{{ts}}", type: "action", props: { spider, + infoBox: { + type: "alert", + alertType: "info", + content: "See [the Spider documentation](https://spider.cloud/docs/api#crawl-website) for information on limits and best practices.", + }, url: { - propDefinition: [ - spider, - "url", - ], + type: "string", + label: "URL", + description: "The URI resource to crawl, e.g. `https://spider.cloud`. This can be a comma split list for multiple urls.", + }, + limit: { + type: "integer", + label: "Limit", + description: "The maximum amount of pages allowed to crawl per website. Default is 0, which crawls all pages.", + optional: true, + }, + storeData: { + type: "boolean", + label: "Store Data", + description: "Decide whether to store data. Default is `false`.", + optional: true, }, }, async run({ $ }) { - const content = await this.spider.initiateCrawl(); - $.export("$summary", `Successfully scraped content from ${this.url}`); + const content = await this.spider.initiateCrawl({ + $, + data: { + url: this.url, + limit: this.limit, + store_data: this.storeData, + }, + }); + $.export("$summary", `Successfully scraped URL ${this.url}`); return content; }, }; From 169eb0c67b81455de211a0fb4e10e1c0588dfa6f Mon Sep 17 00:00:00 2001 From: GTFalcao Date: Mon, 14 Oct 2024 02:59:42 -0300 Subject: [PATCH 4/5] pnpm --- pnpm-lock.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ed825eed80aef..5f23952045cb1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9396,7 +9396,10 @@ importers: form-data: 4.0.0 components/spider: - specifiers: {} + specifiers: + '@pipedream/platform': ^3.0.3 + dependencies: + '@pipedream/platform': 3.0.3 components/spiritme: specifiers: From 7ed07d5296a42056a09d7f18991d55f3e42f3f4e Mon Sep 17 00:00:00 2001 From: GTFalcao Date: Tue, 15 Oct 2024 14:36:16 -0300 Subject: [PATCH 5/5] fix --- components/spider/actions/scrape-new-page/scrape-new-page.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/spider/actions/scrape-new-page/scrape-new-page.mjs b/components/spider/actions/scrape-new-page/scrape-new-page.mjs index 83bf2b3ee59db..8aa9325d9c445 100644 --- a/components/spider/actions/scrape-new-page/scrape-new-page.mjs +++ b/components/spider/actions/scrape-new-page/scrape-new-page.mjs @@ -4,7 +4,7 @@ export default { key: "spider-scrape-new-page", name: "Scrape New Page", description: "Initiates a new page scrape (crawl). [See the documentation](https://spider.cloud/docs/api#crawl-website)", - version: "0.0.{{ts}}", + version: "0.0.1", type: "action", props: { spider,