Skip to content

Commit c0b1155

Browse files
committed
add additional optional props
1 parent 85a1e77 commit c0b1155

File tree

5 files changed

+392
-1
lines changed

5 files changed

+392
-1
lines changed

components/webscraping_ai/actions/ask-question/ask-question.mjs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import webscrapingAI from "../../webscraping_ai.app.mjs";
2+
import utils from "../../common/utils.mjs";
23

34
export default {
45
key: "webscraping_ai-ask-question",
@@ -19,13 +20,104 @@ export default {
1920
label: "Question",
2021
description: "The question to ask about the given webpage. E.g. `What is the summary of this page content?`",
2122
},
23+
headers: {
24+
propDefinition: [
25+
webscrapingAI,
26+
"headers",
27+
],
28+
},
29+
timeout: {
30+
propDefinition: [
31+
webscrapingAI,
32+
"timeout",
33+
],
34+
},
35+
js: {
36+
propDefinition: [
37+
webscrapingAI,
38+
"js",
39+
],
40+
},
41+
jsTimeout: {
42+
propDefinition: [
43+
webscrapingAI,
44+
"jsTimeout",
45+
],
46+
},
47+
waitFor: {
48+
propDefinition: [
49+
webscrapingAI,
50+
"waitFor",
51+
],
52+
},
53+
proxy: {
54+
propDefinition: [
55+
webscrapingAI,
56+
"proxy",
57+
],
58+
},
59+
country: {
60+
propDefinition: [
61+
webscrapingAI,
62+
"country",
63+
],
64+
},
65+
customProxy: {
66+
propDefinition: [
67+
webscrapingAI,
68+
"customProxy",
69+
],
70+
},
71+
device: {
72+
propDefinition: [
73+
webscrapingAI,
74+
"device",
75+
],
76+
},
77+
errorOn404: {
78+
propDefinition: [
79+
webscrapingAI,
80+
"errorOn404",
81+
],
82+
},
83+
errorOnRedirect: {
84+
propDefinition: [
85+
webscrapingAI,
86+
"errorOnRedirect",
87+
],
88+
},
89+
jsScript: {
90+
propDefinition: [
91+
webscrapingAI,
92+
"jsScript",
93+
],
94+
},
95+
format: {
96+
propDefinition: [
97+
webscrapingAI,
98+
"format",
99+
],
100+
},
22101
},
23102
async run({ $ }) {
24103
const response = await this.webscrapingAI.getAnswerToQuestion({
25104
$,
26105
params: {
27106
url: this.targetUrl,
28107
question: this.question,
108+
headers: utils.stringifyHeaders(this.headers),
109+
timeout: this.timeout,
110+
js: this.js,
111+
js_timeout: this.jsTimeout,
112+
wait_for: this.waitFor,
113+
proxy: this.proxy,
114+
country: this.country,
115+
custom_proxy: this.customProxy,
116+
device: this.device,
117+
error_on_404: this.errorOn404,
118+
error_on_redirect: this.errorOnRedirect,
119+
js_script: this.jsScript,
120+
format: this.format,
29121
},
30122
});
31123
$.export("$summary", "Successfully retrieved answer to question");

components/webscraping_ai/actions/scrape-website-html/scrape-website-html.mjs

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import webscrapingAI from "../../webscraping_ai.app.mjs";
2+
import utils from "../../common/utils.mjs";
23

34
export default {
45
key: "webscraping_ai-scrape-website-html",
@@ -14,13 +15,110 @@ export default {
1415
"targetUrl",
1516
],
1617
},
18+
headers: {
19+
propDefinition: [
20+
webscrapingAI,
21+
"headers",
22+
],
23+
},
24+
timeout: {
25+
propDefinition: [
26+
webscrapingAI,
27+
"timeout",
28+
],
29+
},
30+
js: {
31+
propDefinition: [
32+
webscrapingAI,
33+
"js",
34+
],
35+
},
36+
jsTimeout: {
37+
propDefinition: [
38+
webscrapingAI,
39+
"jsTimeout",
40+
],
41+
},
42+
waitFor: {
43+
propDefinition: [
44+
webscrapingAI,
45+
"waitFor",
46+
],
47+
},
48+
proxy: {
49+
propDefinition: [
50+
webscrapingAI,
51+
"proxy",
52+
],
53+
},
54+
country: {
55+
propDefinition: [
56+
webscrapingAI,
57+
"country",
58+
],
59+
},
60+
customProxy: {
61+
propDefinition: [
62+
webscrapingAI,
63+
"customProxy",
64+
],
65+
},
66+
device: {
67+
propDefinition: [
68+
webscrapingAI,
69+
"device",
70+
],
71+
},
72+
errorOn404: {
73+
propDefinition: [
74+
webscrapingAI,
75+
"errorOn404",
76+
],
77+
},
78+
errorOnRedirect: {
79+
propDefinition: [
80+
webscrapingAI,
81+
"errorOnRedirect",
82+
],
83+
},
84+
jsScript: {
85+
propDefinition: [
86+
webscrapingAI,
87+
"jsScript",
88+
],
89+
},
90+
format: {
91+
propDefinition: [
92+
webscrapingAI,
93+
"format",
94+
],
95+
},
96+
returnScriptResult: {
97+
type: "boolean",
98+
label: "Return Script Result",
99+
description: "Return result of the custom JavaScript code (`js_script` parameter) execution on the target page (`false` by default, page HTML will be returned).",
100+
optional: true,
101+
},
17102
},
18103
async run({ $ }) {
19104
const response = await this.webscrapingAI.pageHtmlByUrl({
20105
$,
21106
params: {
22107
url: this.targetUrl,
23-
format: "json",
108+
headers: utils.stringifyHeaders(this.headers),
109+
timeout: this.timeout,
110+
js: this.js,
111+
js_timeout: this.jsTimeout,
112+
wait_for: this.waitFor,
113+
proxy: this.proxy,
114+
country: this.country,
115+
custom_proxy: this.customProxy,
116+
device: this.device,
117+
error_on_404: this.errorOn404,
118+
error_on_redirect: this.errorOnRedirect,
119+
js_script: this.jsScript,
120+
format: this.format,
121+
return_script_result: this.returnScriptResult,
24122
},
25123
});
26124
$.export("$summary", `Successfully scraped HTML of URL ${this.targetUrl}`);

components/webscraping_ai/actions/scrape-website-text/scrape-website-text.mjs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import webscrapingAI from "../../webscraping_ai.app.mjs";
2+
import utils from "../../common/utils.mjs";
23

34
export default {
45
key: "webscraping_ai-scrape-website-text",
@@ -14,6 +15,78 @@ export default {
1415
"targetUrl",
1516
],
1617
},
18+
headers: {
19+
propDefinition: [
20+
webscrapingAI,
21+
"headers",
22+
],
23+
},
24+
timeout: {
25+
propDefinition: [
26+
webscrapingAI,
27+
"timeout",
28+
],
29+
},
30+
js: {
31+
propDefinition: [
32+
webscrapingAI,
33+
"js",
34+
],
35+
},
36+
jsTimeout: {
37+
propDefinition: [
38+
webscrapingAI,
39+
"jsTimeout",
40+
],
41+
},
42+
waitFor: {
43+
propDefinition: [
44+
webscrapingAI,
45+
"waitFor",
46+
],
47+
},
48+
proxy: {
49+
propDefinition: [
50+
webscrapingAI,
51+
"proxy",
52+
],
53+
},
54+
country: {
55+
propDefinition: [
56+
webscrapingAI,
57+
"country",
58+
],
59+
},
60+
customProxy: {
61+
propDefinition: [
62+
webscrapingAI,
63+
"customProxy",
64+
],
65+
},
66+
device: {
67+
propDefinition: [
68+
webscrapingAI,
69+
"device",
70+
],
71+
},
72+
errorOn404: {
73+
propDefinition: [
74+
webscrapingAI,
75+
"errorOn404",
76+
],
77+
},
78+
errorOnRedirect: {
79+
propDefinition: [
80+
webscrapingAI,
81+
"errorOnRedirect",
82+
],
83+
},
84+
jsScript: {
85+
propDefinition: [
86+
webscrapingAI,
87+
"jsScript",
88+
],
89+
},
1790
textFormat: {
1891
type: "string",
1992
label: "Text Format",
@@ -38,6 +111,18 @@ export default {
38111
$,
39112
params: {
40113
url: this.targetUrl,
114+
headers: utils.stringifyHeaders(this.headers),
115+
timeout: this.timeout,
116+
js: this.js,
117+
js_timeout: this.jsTimeout,
118+
wait_for: this.waitFor,
119+
proxy: this.proxy,
120+
country: this.country,
121+
custom_proxy: this.customProxy,
122+
device: this.device,
123+
error_on_404: this.errorOn404,
124+
error_on_redirect: this.errorOnRedirect,
125+
js_script: this.jsScript,
41126
text_format: this.textFormat,
42127
return_links: this.returnLinks,
43128
},
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
function stringifyHeaders(headers) {
2+
if (!headers) {
3+
return undefined;
4+
}
5+
return typeof headers === "string"
6+
? headers
7+
: JSON.stringify(headers);
8+
}
9+
10+
export default {
11+
stringifyHeaders,
12+
};

0 commit comments

Comments
 (0)