diff --git a/components/spider/actions/scrape-new-page/scrape-new-page.mjs b/components/spider/actions/scrape-new-page/scrape-new-page.mjs new file mode 100644 index 0000000000000..8aa9325d9c445 --- /dev/null +++ b/components/spider/actions/scrape-new-page/scrape-new-page.mjs @@ -0,0 +1,46 @@ +import spider from "../../spider.app.mjs"; + +export default { + key: "spider-scrape-new-page", + name: "Scrape New Page", + description: "Initiates a new page scrape (crawl). [See the documentation](https://spider.cloud/docs/api#crawl-website)", + version: "0.0.1", + type: "action", + props: { + spider, + infoBox: { + type: "alert", + alertType: "info", + content: "See [the Spider documentation](https://spider.cloud/docs/api#crawl-website) for information on limits and best practices.", + }, + url: { + type: "string", + label: "URL", + description: "The URI resource to crawl, e.g. `https://spider.cloud`. This can be a comma split list for multiple urls.", + }, + limit: { + type: "integer", + label: "Limit", + description: "The maximum amount of pages allowed to crawl per website. Default is 0, which crawls all pages.", + optional: true, + }, + storeData: { + type: "boolean", + label: "Store Data", + description: "Decide whether to store data. Default is `false`.", + optional: true, + }, + }, + async run({ $ }) { + const content = await this.spider.initiateCrawl({ + $, + data: { + url: this.url, + limit: this.limit, + store_data: this.storeData, + }, + }); + $.export("$summary", `Successfully scraped URL ${this.url}`); + return content; + }, +}; diff --git a/components/spider/package.json b/components/spider/package.json index cc316e1ccfb32..6f467bb3f29fe 100644 --- a/components/spider/package.json +++ b/components/spider/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/spider", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream Spider Components", "main": "spider.app.mjs", "keywords": [ @@ -11,5 +11,8 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.0.3" } -} \ No newline at end of file +} diff --git a/components/spider/spider.app.mjs b/components/spider/spider.app.mjs index 98004efd50fdd..115ffb6352914 100644 --- a/components/spider/spider.app.mjs +++ b/components/spider/spider.app.mjs @@ -1,11 +1,32 @@ +import { axios } from "@pipedream/platform"; + export default { type: "app", app: "spider", propDefinitions: {}, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); + _baseUrl() { + return "https://api.spider.cloud"; + }, + async _makeRequest({ + $ = this, path = "/", headers, ...otherOpts + } = {}) { + return axios($, { + ...otherOpts, + url: this._baseUrl() + path, + headers: { + ...headers, + "Authorization": `Bearer ${this.$auth.api_key}`, + "Content-Type": "application/json", + }, + }); + }, + async initiateCrawl(args) { + return this._makeRequest({ + method: "POST", + path: "/crawl", + ...args, + }); }, }, -}; \ No newline at end of file +}; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ed825eed80aef..5f23952045cb1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9396,7 +9396,10 @@ importers: form-data: 4.0.0 components/spider: - specifiers: {} + specifiers: + '@pipedream/platform': ^3.0.3 + dependencies: + '@pipedream/platform': 3.0.3 components/spiritme: specifiers: