Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions components/webscrape_ai/actions/scrape-website/scrape-website.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import webscrapeAi from "../../webscrape_ai.app.mjs";

export default {
key: "webscrape_ai-scrape-website",
name: "Scrape Website",
description: "Scrape the provided URL and store the results in the system. [See the documentation](https://webscrapeai.com/docs)",
version: "0.0.1",
type: "action",
props: {
webscrapeAi,
alert: {

Check warning on line 11 in components/webscrape_ai/actions/scrape-website/scrape-website.mjs

View workflow job for this annotation

GitHub Actions / Lint Code Base

Component prop alert must have a description. See https://pipedream.com/docs/components/guidelines/#props

Check warning on line 11 in components/webscrape_ai/actions/scrape-website/scrape-website.mjs

View workflow job for this annotation

GitHub Actions / Lint Code Base

Component prop alert must have a label. See https://pipedream.com/docs/components/guidelines/#props
type: "alert",
alertType: "info",
content: "This actions sends a synchronous request to the WebScrapeAI API and may require increasing the workflow's default timeout.",
},
url: {
type: "string",
label: "URL",
description: "The URL of the website to scrape",
},
command: {
type: "string",
label: "Command",
description: "The data you want to extract. E.g. `I want to extract all the news details`",
},
schema: {
type: "string",
label: "Schema",
description: "Schema representing the fields you want to scrape. E.g. `{\"author\":\"string\",\"comments_count\":\"integer\",\"points\":\"integer\",\"posted_time\":\"string\",\"title\":\"string\",\"url\":\"url\"}`",
},
pages: {
type: "integer",
label: "Pages",
description: "Number of pages to scrape. Default value is 1.",
optional: true,
},
headers: {
type: "string",
label: "Headers",
description: "List of headers in key-value pairs. i.e `Accept: application/json`",
optional: true,
},
instructions: {
type: "string",
label: "Instructions",
description: "List of JavaScript instructions that you want to execute, like clicking a specific button, waiting for a specific code block to appear, etc. Example: `{\"click\": \"#button_id\"}`. [See the documentation](https://webscrapeai.com/docs) for more information.",
optional: true,
},
},
async run({ $ }) {
const response = await this.webscrapeAi.scrapeWebsite({
$,
params: {
url: this.url,
command: this.command,
schema: typeof this.schema === "object"
? JSON.stringify(this.schema)
: this.schema,
pages: this.pages,
headers: this.headers,
instructions: this.instructions,
},
});
$.export("$summary", `Scraped ${this.url} and got ${response.length} result${response.length === 1
? ""
: "s"}`);
return response;
},
};
7 changes: 5 additions & 2 deletions components/webscrape_ai/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@pipedream/webscrape_ai",
"version": "0.0.1",
"version": "0.1.0",
"description": "Pipedream Webscrape AI Components",
"main": "webscrape_ai.app.mjs",
"keywords": [
Expand All @@ -11,5 +11,8 @@
"author": "Pipedream <[email protected]> (https://pipedream.com/)",
"publishConfig": {
"access": "public"
},
"dependencies": {
"@pipedream/platform": "^3.1.0"
}
}
}
27 changes: 23 additions & 4 deletions components/webscrape_ai/webscrape_ai.app.mjs
Original file line number Diff line number Diff line change
@@ -1,11 +1,30 @@
import { axios } from "@pipedream/platform";

export default {
type: "app",
app: "webscrape_ai",
propDefinitions: {},
methods: {
// this.$auth contains connected account data
authKeys() {
console.log(Object.keys(this.$auth));
_baseUrl() {
return "https://api.webscrapeai.com";
},
_makeRequest({
$ = this, path, params, ...opts
}) {
return axios($, {
url: `${this._baseUrl()}${path}`,
params: {
...params,
apiKey: `${this.$auth.api_key}`,
},
...opts,
});
},
scrapeWebsite(opts = {}) {
return this._makeRequest({
path: "/scrapeWebSite",
...opts,
});
},
},
};
};
11 changes: 8 additions & 3 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading