Skip to content

Commit ea500e6

Browse files
committed
webscraping_ai init
1 parent a57dd3d commit ea500e6

File tree

4 files changed

+233
-0
lines changed

4 files changed

+233
-0
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import webscraping_ai from "../../webscraping_ai.app.mjs";
2+
import { axios } from "@pipedream/platform";
3+
4+
export default {
5+
key: "webscraping_ai-ask-question",
6+
name: "Ask Question about Webpage",
7+
description: "Gets an answer to a question about a given webpage. [See the documentation](https://webscraping.ai/docs)",
8+
version: "0.0.{{ts}}",
9+
type: "action",
10+
props: {
11+
webscraping_ai,
12+
targetUrl: {
13+
propDefinition: [
14+
webscraping_ai,
15+
"targetUrl",
16+
],
17+
},
18+
question: {
19+
propDefinition: [
20+
webscraping_ai,
21+
"question",
22+
],
23+
},
24+
},
25+
async run({ $ }) {
26+
const response = await this.webscraping_ai.getAnswerToQuestion();
27+
$.export("$summary", `Answer: ${response.answer}`);
28+
return response;
29+
},
30+
};
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import webscraping_ai from "../../webscraping_ai.app.mjs";
2+
import { axios } from "@pipedream/platform";
3+
4+
export default {
5+
key: "webscraping_ai-scrape-website-html",
6+
name: "Scrape Website HTML",
7+
description: "Starts a new web scraping job with specified configurations. [See the documentation]():",
8+
version: "0.0.{{ts}}",
9+
type: "action",
10+
props: {
11+
webscraping_ai,
12+
targetUrl: {
13+
propDefinition: [
14+
"webscraping_ai",
15+
"targetUrl",
16+
],
17+
},
18+
selectors: {
19+
propDefinition: [
20+
"webscraping_ai",
21+
"selectors",
22+
],
23+
optional: true,
24+
},
25+
renderingMode: {
26+
propDefinition: [
27+
"webscraping_ai",
28+
"renderingMode",
29+
],
30+
optional: true,
31+
},
32+
headers: {
33+
propDefinition: [
34+
"webscraping_ai",
35+
"headers",
36+
],
37+
optional: true,
38+
},
39+
},
40+
async run({ $ }) {
41+
const response = await this.webscraping_ai.startScrapingJob();
42+
$.export("$summary", `Started scraping job for URL ${this.targetUrl}`);
43+
return response;
44+
},
45+
};
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import webscraping_ai from "../../webscraping_ai.app.mjs";
2+
import { axios } from "@pipedream/platform";
3+
4+
export default {
5+
key: "webscraping_ai-scrape-website-text",
6+
name: "Scrape Website Text",
7+
description: "Returns the visible text content of a webpage specified by the URL. [See the documentation]().",
8+
version: "0.0.{{ts}}",
9+
type: "action",
10+
props: {
11+
webscraping_ai: {
12+
type: "app",
13+
app: "webscraping_ai",
14+
},
15+
targetUrl: {
16+
propDefinition: [
17+
webscraping_ai,
18+
"targetUrl",
19+
],
20+
},
21+
textFormat: {
22+
propDefinition: [
23+
webscraping_ai,
24+
"textFormat",
25+
],
26+
optional: true,
27+
},
28+
returnLinks: {
29+
propDefinition: [
30+
webscraping_ai,
31+
"returnLinks",
32+
],
33+
optional: true,
34+
},
35+
},
36+
async run({ $ }) {
37+
const response = await this.webscraping_ai.getVisibleTextContent();
38+
$.export("$summary", `Successfully scraped text from ${this.targetUrl}`);
39+
return response;
40+
},
41+
};
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import { axios } from "@pipedream/platform";
2+
3+
export default {
4+
type: "app",
5+
app: "webscraping_ai",
6+
version: "0.0.{{ts}}",
7+
propDefinitions: {
8+
targetUrl: {
9+
type: "string",
10+
label: "Target URL",
11+
description: "The URL of the webpage to scrape.",
12+
},
13+
selectors: {
14+
type: "string[]",
15+
label: "Selectors",
16+
description: "Optional CSS selectors to target specific elements on the page.",
17+
optional: true,
18+
},
19+
renderingMode: {
20+
type: "string",
21+
label: "Rendering Mode",
22+
description: "The mode to render the page (e.g., 'light', 'dark').",
23+
optional: true,
24+
},
25+
headers: {
26+
type: "string[]",
27+
label: "Headers",
28+
description: "Optional HTTP headers to include in the request, as JSON strings.",
29+
optional: true,
30+
},
31+
textFormat: {
32+
type: "string",
33+
label: "Text Format",
34+
description: "The format of the returned text content (e.g., 'plain', 'html').",
35+
optional: true,
36+
},
37+
returnLinks: {
38+
type: "boolean",
39+
label: "Return Links",
40+
description: "Whether to include links in the returned text content.",
41+
optional: true,
42+
},
43+
question: {
44+
type: "string",
45+
label: "Question",
46+
description: "The question to ask about the given webpage.",
47+
},
48+
},
49+
methods: {
50+
_baseUrl() {
51+
return "https://api.webscraping.ai";
52+
},
53+
async _makeRequest(opts = {}) {
54+
const {
55+
$, method = "GET", path = "/", headers = {}, ...otherOpts
56+
} = opts;
57+
return axios($, {
58+
method,
59+
url: `${this._baseUrl()}${path}`,
60+
headers: {
61+
...headers,
62+
"User-Agent": "@PipedreamHQ/pipedream v0.1",
63+
"Authorization": `Bearer ${this.$auth.api_key}`,
64+
},
65+
...otherOpts,
66+
});
67+
},
68+
async startScrapingJob() {
69+
const data = {
70+
url: this.targetUrl,
71+
};
72+
if (this.selectors) data.selectors = this.selectors;
73+
if (this.renderingMode) data.rendering_mode = this.renderingMode;
74+
if (this.headers) {
75+
data.headers = this.headers.reduce((acc, headerStr) => {
76+
try {
77+
const header = JSON.parse(headerStr);
78+
return {
79+
...acc,
80+
...header,
81+
};
82+
} catch (e) {
83+
return acc;
84+
}
85+
}, {});
86+
}
87+
return this._makeRequest({
88+
method: "POST",
89+
path: "/scraping-jobs",
90+
data,
91+
});
92+
},
93+
async getVisibleTextContent() {
94+
const params = {
95+
url: this.targetUrl,
96+
};
97+
if (this.textFormat) params.text_format = this.textFormat;
98+
if (this.returnLinks !== undefined) params.return_links = this.returnLinks;
99+
return this._makeRequest({
100+
method: "GET",
101+
path: "/text-content",
102+
params,
103+
});
104+
},
105+
async getAnswerToQuestion() {
106+
const data = {
107+
url: this.targetUrl,
108+
question: this.question,
109+
};
110+
return this._makeRequest({
111+
method: "POST",
112+
path: "/answer",
113+
data,
114+
});
115+
},
116+
},
117+
};

0 commit comments

Comments
 (0)