Skip to content

Commit 1ad1834

Browse files
committed
Parse Page Content
1 parent be9b680 commit 1ad1834

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import { parseObjectEntries } from "../../common/utils.mjs";
2+
import dataforseo from "../../dataforseo.app.mjs";
3+
4+
export default {
5+
key: "dataforseo-parse-page-content",
6+
name: "Parse Page Content",
7+
description:
8+
"Parse the content on any page and return its structured content. [See the documentation](https://docs.dataforseo.com/v3/on_page/content_parsing/live/)",
9+
version: "0.0.1",
10+
type: "action",
11+
methods: {
12+
parsePageContent(args = {}) {
13+
return this._makeRequest({
14+
path: "/on_page/content_parsing/live",
15+
method: "post",
16+
...args,
17+
});
18+
},
19+
},
20+
props: {
21+
dataforseo,
22+
url: {
23+
type: "string",
24+
label: "URL",
25+
description:
26+
"The URL of the page to parse, e.g. `https://pipedream.com/`",
27+
},
28+
customUserAgent: {
29+
type: "string",
30+
label: "Custom User Agent",
31+
description: "Custom user agent for crawling a website. Default is `Mozilla/5.0 (compatible; RSiteAuditor)`",
32+
optional: true,
33+
},
34+
storeRawHtml: {
35+
type: "boolean",
36+
label: "Store Raw HTML",
37+
description: "Set to `true` if you want to get the HTML of the page using the [https://docs.dataforseo.com/v3/on_page/raw_html/](OnPage Raw HTML endpoint)",
38+
optional: true,
39+
},
40+
enableJavascript: {
41+
type: "boolean",
42+
label: "Enable Javascript",
43+
description: "Set to `true` if you want to load the scripts available on a page",
44+
optional: true,
45+
},
46+
additionalOptions: {
47+
propDefinition: [
48+
dataforseo,
49+
"additionalOptions",
50+
],
51+
description:
52+
"Additional parameters to send in the request. [See the documentation](https://docs.dataforseo.com/v3/on_page/content_parsing/live/) for all available parameters. Values will be parsed as JSON where applicable.",
53+
},
54+
},
55+
async run({ $ }) {
56+
const response = await this.parsePageContent({
57+
$,
58+
data: [
59+
{
60+
url: this.url,
61+
custom_user_agent: this.customUserAgent,
62+
store_raw_html: this.storeRawHtml,
63+
enable_javascript: this.enableJavascript,
64+
...parseObjectEntries(this.additionalOptions),
65+
},
66+
],
67+
});
68+
$.export("$summary", "Successfully parsed page content");
69+
return response;
70+
},
71+
};

components/dataforseo/dataforseo.app.mjs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export default {
2525
label: "Location Coordinate",
2626
description:
2727
"The location to search, in the format `latitude,longitude,radius` where radius is specified in kilometers. Example: `53.476225,-2.243572,200`",
28+
2829
},
2930
targetType: {
3031
type: "string",

0 commit comments

Comments
 (0)