Skip to content

Commit 4a0c6aa

Browse files
authored
Merging pull request #17907
* new components * pnpm-lock.yaml
1 parent f04dbfb commit 4a0c6aa

File tree

6 files changed

+303
-7
lines changed

6 files changed

+303
-7
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import brightData from "../../bright_data.app.mjs";
2+
import { ConfigurationError } from "@pipedream/platform";
3+
4+
export default {
5+
key: "bright_data-scrape-serp",
6+
name: "Scrape SERP",
7+
description: "Extract search engine results using Bright Data SERP API. [See the documentation](https://docs.brightdata.com/api-reference/rest-api/serp/scrape-serp)",
8+
version: "0.0.1",
9+
type: "action",
10+
props: {
11+
brightData,
12+
url: {
13+
propDefinition: [
14+
brightData,
15+
"url",
16+
],
17+
description: "Complete target URL to scrape. Must include protocol (http/https), be publicly accessible. Example: `https://www.google.com/search?q=pizza`",
18+
},
19+
zone: {
20+
propDefinition: [
21+
brightData,
22+
"zone",
23+
() => ({
24+
type: "serp",
25+
}),
26+
],
27+
},
28+
format: {
29+
propDefinition: [
30+
brightData,
31+
"format",
32+
],
33+
},
34+
method: {
35+
propDefinition: [
36+
brightData,
37+
"method",
38+
],
39+
},
40+
country: {
41+
propDefinition: [
42+
brightData,
43+
"country",
44+
],
45+
},
46+
dataFormat: {
47+
propDefinition: [
48+
brightData,
49+
"dataFormat",
50+
],
51+
},
52+
},
53+
async run({ $ }) {
54+
const data = await this.brightData.requestWebsite({
55+
$,
56+
data: {
57+
url: this.url,
58+
zone: this.zone,
59+
format: this.format,
60+
method: this.method,
61+
country: this.country,
62+
data_format: this.dataFormat,
63+
},
64+
});
65+
66+
if (data.status_code === 400) {
67+
throw new ConfigurationError(data.body);
68+
}
69+
70+
$.export("$summary", `Scraped SERP for ${this.url}`);
71+
return data;
72+
},
73+
};
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import brightData from "../../bright_data.app.mjs";
2+
import { ConfigurationError } from "@pipedream/platform";
3+
4+
export default {
5+
key: "bright_data-scrape-website",
6+
name: "Scrape Website",
7+
description: "Scrape a website and return the HTML. [See the documentation](https://docs.brightdata.com/api-reference/web-scraper-api/synchronous-requests)",
8+
version: "0.0.1",
9+
type: "action",
10+
props: {
11+
brightData,
12+
datasetId: {
13+
propDefinition: [
14+
brightData,
15+
"datasetId",
16+
],
17+
},
18+
url: {
19+
propDefinition: [
20+
brightData,
21+
"url",
22+
],
23+
description: "The URL of the website to scrape",
24+
},
25+
},
26+
async run({ $ }) {
27+
try {
28+
const data = await this.brightData.scrapeWebsite({
29+
$,
30+
params: {
31+
dataset_id: this.datasetId,
32+
},
33+
data: {
34+
input: [
35+
{
36+
url: this.url,
37+
},
38+
],
39+
},
40+
});
41+
42+
$.export("$summary", `Scraped website ${this.url}`);
43+
return data;
44+
} catch (error) {
45+
const errors = (JSON.parse(error.message)).errors;
46+
throw new ConfigurationError(errors.map((e) => e.join(" - ")).join(" | "));
47+
}
48+
},
49+
};
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import brightData from "../../bright_data.app.mjs";
2+
3+
export default {
4+
key: "bright_data-unlock-website",
5+
name: "Unlock Website",
6+
description: "Send an API call to a URL and get the HTML back. Enables you to bypass anti-bot measures, manages proxies, and solves CAPTCHAs automatically for easier web data collection. [See the documentation](https://docs.brightdata.com/api-reference/rest-api/unlocker/unlock-website)",
7+
version: "0.0.1",
8+
type: "action",
9+
props: {
10+
brightData,
11+
url: {
12+
propDefinition: [
13+
brightData,
14+
"url",
15+
],
16+
},
17+
zone: {
18+
propDefinition: [
19+
brightData,
20+
"zone",
21+
() => ({
22+
type: "unblocker",
23+
}),
24+
],
25+
},
26+
format: {
27+
propDefinition: [
28+
brightData,
29+
"format",
30+
],
31+
},
32+
method: {
33+
propDefinition: [
34+
brightData,
35+
"method",
36+
],
37+
},
38+
country: {
39+
propDefinition: [
40+
brightData,
41+
"country",
42+
],
43+
},
44+
dataFormat: {
45+
propDefinition: [
46+
brightData,
47+
"dataFormat",
48+
],
49+
},
50+
},
51+
async run({ $ }) {
52+
const data = await this.brightData.requestWebsite({
53+
$,
54+
data: {
55+
url: this.url,
56+
zone: this.zone,
57+
format: this.format,
58+
method: this.method,
59+
country: this.country,
60+
data_format: this.dataFormat,
61+
},
62+
});
63+
64+
$.export("$summary", `Unlocked website ${this.url}`);
65+
return data;
66+
},
67+
};
Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,111 @@
1+
import { axios } from "@pipedream/platform";
2+
13
export default {
24
type: "app",
35
app: "bright_data",
4-
propDefinitions: {},
6+
propDefinitions: {
7+
datasetId: {
8+
type: "string",
9+
label: "Dataset ID",
10+
description: "The ID of the dataset to use",
11+
async options() {
12+
const datasets = await this.listDatasets();
13+
return datasets.map((dataset) => ({
14+
label: dataset.name,
15+
value: dataset.id,
16+
}));
17+
},
18+
},
19+
zone: {
20+
type: "string",
21+
label: "Zone",
22+
description: "Zone identifier that defines your Bright Data product configuration. Each zone contains targeting rules, output preferences, and access permissions. Manage zones at: https://brightdata.com/cp/zones",
23+
async options({ type }) {
24+
const zones = await this.listZones();
25+
return zones?.filter((zone) => zone.type === type)?.map(({ name }) => name) || [];
26+
},
27+
},
28+
url: {
29+
type: "string",
30+
label: "URL",
31+
description: "Complete target URL to scrape. Must include protocol (http/https), be publicly accessible.",
32+
},
33+
format: {
34+
type: "string",
35+
label: "Format",
36+
description: "Output format of the response",
37+
options: [
38+
"json",
39+
"raw",
40+
],
41+
},
42+
method: {
43+
type: "string",
44+
label: "Method",
45+
description: "HTTP method to use for the request",
46+
options: [
47+
"GET",
48+
"POST",
49+
],
50+
optional: true,
51+
},
52+
country: {
53+
type: "string",
54+
label: "Country",
55+
description: "Two-letter ISO 3166-1 country code for proxy location",
56+
optional: true,
57+
},
58+
dataFormat: {
59+
type: "string",
60+
label: "Data Format",
61+
description: "Additional response format transformation: `markdown` converts HTML content to clean markdown format, `screenshot` captures a PNG image of the rendered page.",
62+
options: [
63+
"markdown",
64+
"screenshot",
65+
],
66+
optional: true,
67+
},
68+
},
569
methods: {
6-
// this.$auth contains connected account data
7-
authKeys() {
8-
console.log(Object.keys(this.$auth));
70+
_baseUrl() {
71+
return "https://api.brightdata.com";
72+
},
73+
_makeRequest({
74+
$ = this, path, ...opts
75+
}) {
76+
return axios($, {
77+
url: `${this._baseUrl()}${path}`,
78+
headers: {
79+
Authorization: `Bearer ${this.$auth.api_key}`,
80+
},
81+
...opts,
82+
});
83+
},
84+
listDatasets(opts = {}) {
85+
return this._makeRequest({
86+
path: "/datasets/list",
87+
...opts,
88+
});
89+
},
90+
listZones(opts = {}) {
91+
return this._makeRequest({
92+
path: "/zone/get_active_zones",
93+
...opts,
94+
});
95+
},
96+
scrapeWebsite(opts = {}) {
97+
return this._makeRequest({
98+
path: "/datasets/v3/scrape",
99+
method: "POST",
100+
...opts,
101+
});
102+
},
103+
requestWebsite(opts = {}) {
104+
return this._makeRequest({
105+
path: "/request",
106+
method: "POST",
107+
...opts,
108+
});
9109
},
10110
},
11111
};

components/bright_data/package.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@pipedream/bright_data",
3-
"version": "0.0.1",
3+
"version": "0.1.0",
44
"description": "Pipedream Bright Data Components",
55
"main": "bright_data.app.mjs",
66
"keywords": [
@@ -11,5 +11,8 @@
1111
"author": "Pipedream <[email protected]> (https://pipedream.com/)",
1212
"publishConfig": {
1313
"access": "public"
14+
},
15+
"dependencies": {
16+
"@pipedream/platform": "^3.1.0"
1417
}
15-
}
18+
}

pnpm-lock.yaml

Lines changed: 5 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)