Skip to content

Commit 306d2d8

Browse files
authored
New Components - scrapegraphai (#15106)
* scrapegraphai init * new components * pnpm-lock.yaml
1 parent e6e6ffe commit 306d2d8

File tree

6 files changed

+255
-9
lines changed

6 files changed

+255
-9
lines changed
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import scrapegraphai from "../../scrapegraphai.app.mjs";
2+
3+
export default {
4+
key: "scrapegraphai-start-local-scraper",
5+
name: "Start Local Scraper",
6+
description: "Extract content from HTML content using AI by providing a natural language prompt and the HTML content. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/localscraper/start)",
7+
version: "0.0.1",
8+
type: "action",
9+
props: {
10+
scrapegraphai,
11+
html: {
12+
type: "string",
13+
label: "HTML",
14+
description: "The HTML to scrape",
15+
},
16+
prompt: {
17+
propDefinition: [
18+
scrapegraphai,
19+
"prompt",
20+
],
21+
},
22+
waitForCompletion: {
23+
propDefinition: [
24+
scrapegraphai,
25+
"waitForCompletion",
26+
],
27+
},
28+
},
29+
async run({ $ }) {
30+
let response = await this.scrapegraphai.startLocalScraper({
31+
$,
32+
data: {
33+
website_html: this.html,
34+
user_prompt: this.prompt,
35+
},
36+
});
37+
38+
if (this.waitForCompletion) {
39+
const timer = (ms) => new Promise((res) => setTimeout(res, ms));
40+
while (response.status !== "completed" && response.status !== "failed") {
41+
response = await this.scrapegraphai.getLocalScraperStatus({
42+
$,
43+
requestId: response.request_id,
44+
});
45+
await timer(3000);
46+
}
47+
}
48+
49+
if (response.status !== "failed") {
50+
$.export("$summary", `Successfully ${this.waitForCompletion
51+
? "completed"
52+
: "started" } scraping HTML.`);
53+
}
54+
return response;
55+
},
56+
};
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import scrapegraphai from "../../scrapegraphai.app.mjs";
2+
3+
export default {
4+
key: "scrapegraphai-start-markdownify",
5+
name: "Start Markdownify",
6+
description: "Convert any webpage into clean, readable Markdown format. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/markdownify/start)",
7+
version: "0.0.1",
8+
type: "action",
9+
props: {
10+
scrapegraphai,
11+
url: {
12+
propDefinition: [
13+
scrapegraphai,
14+
"url",
15+
],
16+
description: "The URL of the website to convert into markdown",
17+
},
18+
waitForCompletion: {
19+
propDefinition: [
20+
scrapegraphai,
21+
"waitForCompletion",
22+
],
23+
},
24+
},
25+
async run({ $ }) {
26+
let response = await this.scrapegraphai.startMarkdownify({
27+
$,
28+
data: {
29+
website_url: this.url,
30+
},
31+
});
32+
33+
if (this.waitForCompletion) {
34+
const timer = (ms) => new Promise((res) => setTimeout(res, ms));
35+
while (response.status !== "completed" && response.status !== "failed") {
36+
response = await this.scrapegraphai.getMarkdownifyStatus({
37+
$,
38+
requestId: response.request_id,
39+
});
40+
await timer(3000);
41+
}
42+
}
43+
44+
if (response.status !== "failed") {
45+
$.export("$summary", `Successfully ${this.waitForCompletion
46+
? "completed"
47+
: "started" } converting ${this.url} to markdown.`);
48+
}
49+
return response;
50+
},
51+
};
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import scrapegraphai from "../../scrapegraphai.app.mjs";
2+
3+
export default {
4+
key: "scrapegraphai-start-smart-scraper",
5+
name: "Start Smart Scraper",
6+
description: "Extract content from a webpage using AI by providing a natural language prompt and a URL. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/smartscraper/start).",
7+
version: "0.0.1",
8+
type: "action",
9+
props: {
10+
scrapegraphai,
11+
url: {
12+
propDefinition: [
13+
scrapegraphai,
14+
"url",
15+
],
16+
},
17+
prompt: {
18+
propDefinition: [
19+
scrapegraphai,
20+
"prompt",
21+
],
22+
},
23+
waitForCompletion: {
24+
propDefinition: [
25+
scrapegraphai,
26+
"waitForCompletion",
27+
],
28+
},
29+
},
30+
async run({ $ }) {
31+
let response = await this.scrapegraphai.startSmartScraper({
32+
$,
33+
data: {
34+
website_url: this.url,
35+
user_prompt: this.prompt,
36+
},
37+
});
38+
39+
if (this.waitForCompletion) {
40+
const timer = (ms) => new Promise((res) => setTimeout(res, ms));
41+
while (response.status !== "completed" && response.status !== "failed") {
42+
response = await this.scrapegraphai.getSmartScraperStatus({
43+
$,
44+
requestId: response.request_id,
45+
});
46+
await timer(3000);
47+
}
48+
}
49+
50+
if (response.status !== "failed") {
51+
$.export("$summary", `Successfully ${this.waitForCompletion
52+
? "completed"
53+
: "started" } scraping ${this.url}.`);
54+
}
55+
return response;
56+
},
57+
};

components/scrapegraphai/package.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@pipedream/scrapegraphai",
3-
"version": "0.0.1",
3+
"version": "0.1.0",
44
"description": "Pipedream ScrapeGraphAI Components",
55
"main": "scrapegraphai.app.mjs",
66
"keywords": [
@@ -11,5 +11,8 @@
1111
"author": "Pipedream <[email protected]> (https://pipedream.com/)",
1212
"publishConfig": {
1313
"access": "public"
14+
},
15+
"dependencies": {
16+
"@pipedream/platform": "^3.0.3"
1417
}
15-
}
18+
}
Lines changed: 80 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,87 @@
1+
import { axios } from "@pipedream/platform";
2+
13
export default {
24
type: "app",
35
app: "scrapegraphai",
4-
propDefinitions: {},
6+
propDefinitions: {
7+
url: {
8+
type: "string",
9+
label: "URL to Scrape",
10+
description: "The URL of the website to scrape.",
11+
},
12+
prompt: {
13+
type: "string",
14+
label: "Prompt",
15+
description: "A prompt describing what you want to extract. Example: `Extract info about the company`",
16+
},
17+
waitForCompletion: {
18+
type: "boolean",
19+
label: "Wait For Completion",
20+
description: "Set to `true` to poll the API in 3-second intervals until the request is completed",
21+
optional: true,
22+
},
23+
},
524
methods: {
6-
// this.$auth contains connected account data
7-
authKeys() {
8-
console.log(Object.keys(this.$auth));
25+
_baseUrl() {
26+
return "https://api.scrapegraphai.com/v1";
27+
},
28+
_makeRequest({
29+
$ = this,
30+
path,
31+
...opts
32+
}) {
33+
return axios($, {
34+
url: `${this._baseUrl()}${path}`,
35+
headers: {
36+
"sgai-apikey": `${this.$auth.api_key}`,
37+
},
38+
...opts,
39+
});
40+
},
41+
startSmartScraper(opts = {}) {
42+
return this._makeRequest({
43+
method: "POST",
44+
path: "/smartscraper",
45+
...opts,
46+
});
47+
},
48+
getSmartScraperStatus({
49+
requestId, ...opts
50+
}) {
51+
return this._makeRequest({
52+
path: `/smartscraper/${requestId}`,
53+
...opts,
54+
});
55+
},
56+
startLocalScraper(opts = {}) {
57+
return this._makeRequest({
58+
method: "POST",
59+
path: "/localscraper",
60+
...opts,
61+
});
62+
},
63+
getLocalScraperStatus({
64+
requestId, ...opts
65+
}) {
66+
return this._makeRequest({
67+
path: `/localscraper/${requestId}`,
68+
...opts,
69+
});
70+
},
71+
startMarkdownify(opts = {}) {
72+
return this._makeRequest({
73+
method: "POST",
74+
path: "/markdownify",
75+
...opts,
76+
});
77+
},
78+
getMarkdownifyStatus({
79+
requestId, ...opts
80+
}) {
81+
return this._makeRequest({
82+
path: `/markdownify/${requestId}`,
83+
...opts,
84+
});
985
},
1086
},
1187
};

pnpm-lock.yaml

Lines changed: 6 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)