Skip to content

Commit 4402b45

Browse files
committed
new components
1 parent ea500e6 commit 4402b45

File tree

7 files changed

+76
-161
lines changed

7 files changed

+76
-161
lines changed

components/webscraping_ai/.gitignore

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,34 @@
1-
import webscraping_ai from "../../webscraping_ai.app.mjs";
2-
import { axios } from "@pipedream/platform";
1+
import webscrapingAI from "../../webscraping_ai.app.mjs";
32

43
export default {
54
key: "webscraping_ai-ask-question",
65
name: "Ask Question about Webpage",
7-
description: "Gets an answer to a question about a given webpage. [See the documentation](https://webscraping.ai/docs)",
8-
version: "0.0.{{ts}}",
6+
description: "Gets an answer to a question about a given webpage. [See the documentation](https://webscraping.ai/docs#tag/AI/operation/getQuestion)",
7+
version: "0.0.1",
98
type: "action",
109
props: {
11-
webscraping_ai,
10+
webscrapingAI,
1211
targetUrl: {
1312
propDefinition: [
14-
webscraping_ai,
13+
webscrapingAI,
1514
"targetUrl",
1615
],
1716
},
1817
question: {
19-
propDefinition: [
20-
webscraping_ai,
21-
"question",
22-
],
18+
type: "string",
19+
label: "Question",
20+
description: "The question to ask about the given webpage. E.g. `What is the summary of this page content?`",
2321
},
2422
},
2523
async run({ $ }) {
26-
const response = await this.webscraping_ai.getAnswerToQuestion();
27-
$.export("$summary", `Answer: ${response.answer}`);
24+
const response = await this.webscrapingAI.getAnswerToQuestion({
25+
$,
26+
params: {
27+
url: this.targetUrl,
28+
question: this.question,
29+
},
30+
});
31+
$.export("$summary", "Successfully retrieved answer to question");
2832
return response;
2933
},
3034
};
Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,29 @@
1-
import webscraping_ai from "../../webscraping_ai.app.mjs";
2-
import { axios } from "@pipedream/platform";
1+
import webscrapingAI from "../../webscraping_ai.app.mjs";
32

43
export default {
54
key: "webscraping_ai-scrape-website-html",
65
name: "Scrape Website HTML",
7-
description: "Starts a new web scraping job with specified configurations. [See the documentation]():",
8-
version: "0.0.{{ts}}",
6+
description: "Returns the full HTML content of a webpage specified by the URL. [See the documentation](https://webscraping.ai/docs#tag/HTML/operation/getHTML):",
7+
version: "0.0.1",
98
type: "action",
109
props: {
11-
webscraping_ai,
10+
webscrapingAI,
1211
targetUrl: {
1312
propDefinition: [
14-
"webscraping_ai",
13+
webscrapingAI,
1514
"targetUrl",
1615
],
1716
},
18-
selectors: {
19-
propDefinition: [
20-
"webscraping_ai",
21-
"selectors",
22-
],
23-
optional: true,
24-
},
25-
renderingMode: {
26-
propDefinition: [
27-
"webscraping_ai",
28-
"renderingMode",
29-
],
30-
optional: true,
31-
},
32-
headers: {
33-
propDefinition: [
34-
"webscraping_ai",
35-
"headers",
36-
],
37-
optional: true,
38-
},
3917
},
4018
async run({ $ }) {
41-
const response = await this.webscraping_ai.startScrapingJob();
42-
$.export("$summary", `Started scraping job for URL ${this.targetUrl}`);
19+
const response = await this.webscrapingAI.pageHtmlByUrl({
20+
$,
21+
params: {
22+
url: this.targetUrl,
23+
format: "json",
24+
},
25+
});
26+
$.export("$summary", `Successfully scraped HTML of URL ${this.targetUrl}`);
4327
return response;
4428
},
4529
};

components/webscraping_ai/actions/scrape-website-text/scrape-website-text.mjs

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,47 @@
1-
import webscraping_ai from "../../webscraping_ai.app.mjs";
2-
import { axios } from "@pipedream/platform";
1+
import webscrapingAI from "../../webscraping_ai.app.mjs";
32

43
export default {
54
key: "webscraping_ai-scrape-website-text",
65
name: "Scrape Website Text",
7-
description: "Returns the visible text content of a webpage specified by the URL. [See the documentation]().",
8-
version: "0.0.{{ts}}",
6+
description: "Returns the visible text content of a webpage specified by the URL. [See the documentation](https://webscraping.ai/docs#tag/Text/operation/getText).",
7+
version: "0.0.1",
98
type: "action",
109
props: {
11-
webscraping_ai: {
12-
type: "app",
13-
app: "webscraping_ai",
14-
},
10+
webscrapingAI,
1511
targetUrl: {
1612
propDefinition: [
17-
webscraping_ai,
13+
webscrapingAI,
1814
"targetUrl",
1915
],
2016
},
2117
textFormat: {
22-
propDefinition: [
23-
webscraping_ai,
24-
"textFormat",
18+
type: "string",
19+
label: "Text Format",
20+
description: "The format of the returned text content. Default: `json`",
21+
options: [
22+
"plain",
23+
"xml",
24+
"json",
2525
],
26+
default: "json",
2627
optional: true,
2728
},
2829
returnLinks: {
29-
propDefinition: [
30-
webscraping_ai,
31-
"returnLinks",
32-
],
30+
type: "boolean",
31+
label: "Return Links",
32+
description: "Whether to include links in the returned text content. Works only when Text Format is `json`.",
3333
optional: true,
3434
},
3535
},
3636
async run({ $ }) {
37-
const response = await this.webscraping_ai.getVisibleTextContent();
37+
const response = await this.webscrapingAI.pageTextByUrl({
38+
$,
39+
params: {
40+
url: this.targetUrl,
41+
text_format: this.textFormat,
42+
return_links: this.returnLinks,
43+
},
44+
});
3845
$.export("$summary", `Successfully scraped text from ${this.targetUrl}`);
3946
return response;
4047
},

components/webscraping_ai/app/webscraping_ai.app.ts

Lines changed: 0 additions & 13 deletions
This file was deleted.
Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
{
22
"name": "@pipedream/webscraping_ai",
3-
"version": "0.0.3",
3+
"version": "0.1.0",
44
"description": "Pipedream WebScraping.AI Components",
5-
"main": "dist/app/webscraping_ai.app.mjs",
5+
"main": "webscraping_ai.app.mjs",
66
"keywords": [
77
"pipedream",
88
"webscraping_ai"
99
],
10-
"files": ["dist"],
1110
"homepage": "https://pipedream.com/apps/webscraping_ai",
1211
"author": "Pipedream <[email protected]> (https://pipedream.com/)",
1312
"publishConfig": {
1413
"access": "public"
14+
},
15+
"dependencies": {
16+
"@pipedream/platform": "^3.0.3"
1517
}
1618
}

components/webscraping_ai/webscraping_ai.app.mjs

Lines changed: 18 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -3,114 +3,48 @@ import { axios } from "@pipedream/platform";
33
export default {
44
type: "app",
55
app: "webscraping_ai",
6-
version: "0.0.{{ts}}",
76
propDefinitions: {
87
targetUrl: {
98
type: "string",
109
label: "Target URL",
1110
description: "The URL of the webpage to scrape.",
1211
},
13-
selectors: {
14-
type: "string[]",
15-
label: "Selectors",
16-
description: "Optional CSS selectors to target specific elements on the page.",
17-
optional: true,
18-
},
19-
renderingMode: {
20-
type: "string",
21-
label: "Rendering Mode",
22-
description: "The mode to render the page (e.g., 'light', 'dark').",
23-
optional: true,
24-
},
25-
headers: {
26-
type: "string[]",
27-
label: "Headers",
28-
description: "Optional HTTP headers to include in the request, as JSON strings.",
29-
optional: true,
30-
},
31-
textFormat: {
32-
type: "string",
33-
label: "Text Format",
34-
description: "The format of the returned text content (e.g., 'plain', 'html').",
35-
optional: true,
36-
},
37-
returnLinks: {
38-
type: "boolean",
39-
label: "Return Links",
40-
description: "Whether to include links in the returned text content.",
41-
optional: true,
42-
},
43-
question: {
44-
type: "string",
45-
label: "Question",
46-
description: "The question to ask about the given webpage.",
47-
},
4812
},
4913
methods: {
5014
_baseUrl() {
5115
return "https://api.webscraping.ai";
5216
},
53-
async _makeRequest(opts = {}) {
54-
const {
55-
$, method = "GET", path = "/", headers = {}, ...otherOpts
56-
} = opts;
17+
_makeRequest({
18+
$ = this,
19+
path,
20+
params,
21+
...otherOpts
22+
}) {
5723
return axios($, {
58-
method,
5924
url: `${this._baseUrl()}${path}`,
60-
headers: {
61-
...headers,
62-
"User-Agent": "@PipedreamHQ/pipedream v0.1",
63-
"Authorization": `Bearer ${this.$auth.api_key}`,
25+
params: {
26+
...params,
27+
api_key: this.$auth.api_key,
6428
},
6529
...otherOpts,
6630
});
6731
},
68-
async startScrapingJob() {
69-
const data = {
70-
url: this.targetUrl,
71-
};
72-
if (this.selectors) data.selectors = this.selectors;
73-
if (this.renderingMode) data.rendering_mode = this.renderingMode;
74-
if (this.headers) {
75-
data.headers = this.headers.reduce((acc, headerStr) => {
76-
try {
77-
const header = JSON.parse(headerStr);
78-
return {
79-
...acc,
80-
...header,
81-
};
82-
} catch (e) {
83-
return acc;
84-
}
85-
}, {});
86-
}
32+
pageHtmlByUrl(opts = {}) {
8733
return this._makeRequest({
88-
method: "POST",
89-
path: "/scraping-jobs",
90-
data,
34+
path: "/html",
35+
...opts,
9136
});
9237
},
93-
async getVisibleTextContent() {
94-
const params = {
95-
url: this.targetUrl,
96-
};
97-
if (this.textFormat) params.text_format = this.textFormat;
98-
if (this.returnLinks !== undefined) params.return_links = this.returnLinks;
38+
pageTextByUrl(opts = {}) {
9939
return this._makeRequest({
100-
method: "GET",
101-
path: "/text-content",
102-
params,
40+
path: "/text",
41+
...opts,
10342
});
10443
},
105-
async getAnswerToQuestion() {
106-
const data = {
107-
url: this.targetUrl,
108-
question: this.question,
109-
};
44+
getAnswerToQuestion(opts = {}) {
11045
return this._makeRequest({
111-
method: "POST",
112-
path: "/answer",
113-
data,
46+
path: "/ai/question",
47+
...opts,
11448
});
11549
},
11650
},

0 commit comments

Comments
 (0)