Skip to content

Commit 1813f38

Browse files
authored
Merging pull request #17582
* new component * pnpm-lock.yaml * fix key
1 parent 1e4f62a commit 1813f38

File tree

4 files changed

+101
-6
lines changed

4 files changed

+101
-6
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import webscrapeAi from "../../webscrape_ai.app.mjs";
2+
3+
export default {
4+
key: "webscrape_ai-scrape-website",
5+
name: "Scrape Website",
6+
description: "Scrape the provided URL and store the results in the system. [See the documentation](https://webscrapeai.com/docs)",
7+
version: "0.0.1",
8+
type: "action",
9+
props: {
10+
webscrapeAi,
11+
alert: {
12+
type: "alert",
13+
alertType: "info",
14+
content: "This actions sends a synchronous request to the WebScrapeAI API and may require increasing the workflow's default timeout.",
15+
},
16+
url: {
17+
type: "string",
18+
label: "URL",
19+
description: "The URL of the website to scrape",
20+
},
21+
command: {
22+
type: "string",
23+
label: "Command",
24+
description: "The data you want to extract. E.g. `I want to extract all the news details`",
25+
},
26+
schema: {
27+
type: "string",
28+
label: "Schema",
29+
description: "Schema representing the fields you want to scrape. E.g. `{\"author\":\"string\",\"comments_count\":\"integer\",\"points\":\"integer\",\"posted_time\":\"string\",\"title\":\"string\",\"url\":\"url\"}`",
30+
},
31+
pages: {
32+
type: "integer",
33+
label: "Pages",
34+
description: "Number of pages to scrape. Default value is 1.",
35+
optional: true,
36+
},
37+
headers: {
38+
type: "string",
39+
label: "Headers",
40+
description: "List of headers in key-value pairs. i.e `Accept: application/json`",
41+
optional: true,
42+
},
43+
instructions: {
44+
type: "string",
45+
label: "Instructions",
46+
description: "List of JavaScript instructions that you want to execute, like clicking a specific button, waiting for a specific code block to appear, etc. Example: `{\"click\": \"#button_id\"}`. [See the documentation](https://webscrapeai.com/docs) for more information.",
47+
optional: true,
48+
},
49+
},
50+
async run({ $ }) {
51+
const response = await this.webscrapeAi.scrapeWebsite({
52+
$,
53+
params: {
54+
url: this.url,
55+
command: this.command,
56+
schema: typeof this.schema === "object"
57+
? JSON.stringify(this.schema)
58+
: this.schema,
59+
pages: this.pages,
60+
headers: this.headers,
61+
instructions: this.instructions,
62+
},
63+
});
64+
$.export("$summary", `Scraped ${this.url} and got ${response.length} result${response.length === 1
65+
? ""
66+
: "s"}`);
67+
return response;
68+
},
69+
};

components/webscrape_ai/package.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@pipedream/webscrape_ai",
3-
"version": "0.0.1",
3+
"version": "0.1.0",
44
"description": "Pipedream Webscrape AI Components",
55
"main": "webscrape_ai.app.mjs",
66
"keywords": [
@@ -11,5 +11,8 @@
1111
"author": "Pipedream <[email protected]> (https://pipedream.com/)",
1212
"publishConfig": {
1313
"access": "public"
14+
},
15+
"dependencies": {
16+
"@pipedream/platform": "^3.1.0"
1417
}
15-
}
18+
}
Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,30 @@
1+
import { axios } from "@pipedream/platform";
2+
13
export default {
24
type: "app",
35
app: "webscrape_ai",
46
propDefinitions: {},
57
methods: {
6-
// this.$auth contains connected account data
7-
authKeys() {
8-
console.log(Object.keys(this.$auth));
8+
_baseUrl() {
9+
return "https://api.webscrapeai.com";
10+
},
11+
_makeRequest({
12+
$ = this, path, params, ...opts
13+
}) {
14+
return axios($, {
15+
url: `${this._baseUrl()}${path}`,
16+
params: {
17+
...params,
18+
apiKey: `${this.$auth.api_key}`,
19+
},
20+
...opts,
21+
});
22+
},
23+
scrapeWebsite(opts = {}) {
24+
return this._makeRequest({
25+
path: "/scrapeWebSite",
26+
...opts,
27+
});
928
},
1029
},
1130
};

pnpm-lock.yaml

Lines changed: 5 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)