diff --git a/components/scrapegraphai/actions/start-local-scraper/start-local-scraper.mjs b/components/scrapegraphai/actions/start-local-scraper/start-local-scraper.mjs new file mode 100644 index 0000000000000..97c66f16ddf25 --- /dev/null +++ b/components/scrapegraphai/actions/start-local-scraper/start-local-scraper.mjs @@ -0,0 +1,56 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; + +export default { + key: "scrapegraphai-start-local-scraper", + name: "Start Local Scraper", + description: "Extract content from HTML content using AI by providing a natural language prompt and the HTML content. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/localscraper/start)", + version: "0.0.1", + type: "action", + props: { + scrapegraphai, + html: { + type: "string", + label: "HTML", + description: "The HTML to scrape", + }, + prompt: { + propDefinition: [ + scrapegraphai, + "prompt", + ], + }, + waitForCompletion: { + propDefinition: [ + scrapegraphai, + "waitForCompletion", + ], + }, + }, + async run({ $ }) { + let response = await this.scrapegraphai.startLocalScraper({ + $, + data: { + website_html: this.html, + user_prompt: this.prompt, + }, + }); + + if (this.waitForCompletion) { + const timer = (ms) => new Promise((res) => setTimeout(res, ms)); + while (response.status !== "completed" && response.status !== "failed") { + response = await this.scrapegraphai.getLocalScraperStatus({ + $, + requestId: response.request_id, + }); + await timer(3000); + } + } + + if (response.status !== "failed") { + $.export("$summary", `Successfully ${this.waitForCompletion + ? "completed" + : "started" } scraping HTML.`); + } + return response; + }, +}; diff --git a/components/scrapegraphai/actions/start-markdownify/start-markdownify.mjs b/components/scrapegraphai/actions/start-markdownify/start-markdownify.mjs new file mode 100644 index 0000000000000..6b0702ebf5c2f --- /dev/null +++ b/components/scrapegraphai/actions/start-markdownify/start-markdownify.mjs @@ -0,0 +1,51 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; + +export default { + key: "scrapegraphai-start-markdownify", + name: "Start Markdownify", + description: "Convert any webpage into clean, readable Markdown format. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/markdownify/start)", + version: "0.0.1", + type: "action", + props: { + scrapegraphai, + url: { + propDefinition: [ + scrapegraphai, + "url", + ], + description: "The URL of the website to convert into markdown", + }, + waitForCompletion: { + propDefinition: [ + scrapegraphai, + "waitForCompletion", + ], + }, + }, + async run({ $ }) { + let response = await this.scrapegraphai.startMarkdownify({ + $, + data: { + website_url: this.url, + }, + }); + + if (this.waitForCompletion) { + const timer = (ms) => new Promise((res) => setTimeout(res, ms)); + while (response.status !== "completed" && response.status !== "failed") { + response = await this.scrapegraphai.getMarkdownifyStatus({ + $, + requestId: response.request_id, + }); + await timer(3000); + } + } + + if (response.status !== "failed") { + $.export("$summary", `Successfully ${this.waitForCompletion + ? "completed" + : "started" } converting ${this.url} to markdown.`); + } + return response; + }, +}; diff --git a/components/scrapegraphai/actions/start-smart-scraper/start-smart-scraper.mjs b/components/scrapegraphai/actions/start-smart-scraper/start-smart-scraper.mjs new file mode 100644 index 0000000000000..65777516228c8 --- /dev/null +++ b/components/scrapegraphai/actions/start-smart-scraper/start-smart-scraper.mjs @@ -0,0 +1,57 @@ +import scrapegraphai from "../../scrapegraphai.app.mjs"; + +export default { + key: "scrapegraphai-start-smart-scraper", + name: "Start Smart Scraper", + description: "Extract content from a webpage using AI by providing a natural language prompt and a URL. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/smartscraper/start).", + version: "0.0.1", + type: "action", + props: { + scrapegraphai, + url: { + propDefinition: [ + scrapegraphai, + "url", + ], + }, + prompt: { + propDefinition: [ + scrapegraphai, + "prompt", + ], + }, + waitForCompletion: { + propDefinition: [ + scrapegraphai, + "waitForCompletion", + ], + }, + }, + async run({ $ }) { + let response = await this.scrapegraphai.startSmartScraper({ + $, + data: { + website_url: this.url, + user_prompt: this.prompt, + }, + }); + + if (this.waitForCompletion) { + const timer = (ms) => new Promise((res) => setTimeout(res, ms)); + while (response.status !== "completed" && response.status !== "failed") { + response = await this.scrapegraphai.getSmartScraperStatus({ + $, + requestId: response.request_id, + }); + await timer(3000); + } + } + + if (response.status !== "failed") { + $.export("$summary", `Successfully ${this.waitForCompletion + ? "completed" + : "started" } scraping ${this.url}.`); + } + return response; + }, +}; diff --git a/components/scrapegraphai/package.json b/components/scrapegraphai/package.json index 9a824f842637b..ee342f5acde98 100644 --- a/components/scrapegraphai/package.json +++ b/components/scrapegraphai/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/scrapegraphai", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream ScrapeGraphAI Components", "main": "scrapegraphai.app.mjs", "keywords": [ @@ -11,5 +11,8 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.0.3" } -} \ No newline at end of file +} diff --git a/components/scrapegraphai/scrapegraphai.app.mjs b/components/scrapegraphai/scrapegraphai.app.mjs index 17f74407bfa8c..a35f9ca91450d 100644 --- a/components/scrapegraphai/scrapegraphai.app.mjs +++ b/components/scrapegraphai/scrapegraphai.app.mjs @@ -1,11 +1,87 @@ +import { axios } from "@pipedream/platform"; + export default { type: "app", app: "scrapegraphai", - propDefinitions: {}, + propDefinitions: { + url: { + type: "string", + label: "URL to Scrape", + description: "The URL of the website to scrape.", + }, + prompt: { + type: "string", + label: "Prompt", + description: "A prompt describing what you want to extract. Example: `Extract info about the company`", + }, + waitForCompletion: { + type: "boolean", + label: "Wait For Completion", + description: "Set to `true` to poll the API in 3-second intervals until the request is completed", + optional: true, + }, + }, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); + _baseUrl() { + return "https://api.scrapegraphai.com/v1"; + }, + _makeRequest({ + $ = this, + path, + ...opts + }) { + return axios($, { + url: `${this._baseUrl()}${path}`, + headers: { + "sgai-apikey": `${this.$auth.api_key}`, + }, + ...opts, + }); + }, + startSmartScraper(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/smartscraper", + ...opts, + }); + }, + getSmartScraperStatus({ + requestId, ...opts + }) { + return this._makeRequest({ + path: `/smartscraper/${requestId}`, + ...opts, + }); + }, + startLocalScraper(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/localscraper", + ...opts, + }); + }, + getLocalScraperStatus({ + requestId, ...opts + }) { + return this._makeRequest({ + path: `/localscraper/${requestId}`, + ...opts, + }); + }, + startMarkdownify(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/markdownify", + ...opts, + }); + }, + getMarkdownifyStatus({ + requestId, ...opts + }) { + return this._makeRequest({ + path: `/markdownify/${requestId}`, + ...opts, + }); }, }, }; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8b0526870c357..3d040bc1629c0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8672,8 +8672,7 @@ importers: specifier: ^1.5.1 version: 1.6.6 - components/richpanel: - specifiers: {} + components/richpanel: {} components/ringcentral: dependencies: @@ -8953,7 +8952,11 @@ importers: specifier: ^1.4.1 version: 1.6.6 - components/scrapegraphai: {} + components/scrapegraphai: + dependencies: + '@pipedream/platform': + specifier: ^3.0.3 + version: 3.0.3 components/scrapein_: {}