Skip to content

Commit 884ec80

Browse files
authored
New Components - oxylabs (#17218)
* wip * wip * new source * pnpm-lock.yaml * update package.json * pnpm-lock.yaml * update * update
1 parent a4b09fb commit 884ec80

File tree

13 files changed

+460
-29
lines changed

13 files changed

+460
-29
lines changed

components/oxylabs/.gitignore

Lines changed: 0 additions & 3 deletions
This file was deleted.
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import oxylabs from "../../oxylabs.app.mjs";
2+
import { ConfigurationError } from "@pipedream/platform";
3+
4+
export default {
5+
key: "oxylabs-create-proxy-session",
6+
name: "Create Proxy Session",
7+
description: "Establish a proxy session using the Residential Proxy endpoint. [See the documentation](https://developers.oxylabs.io/proxies/residential-proxies/session-control#establishing-session)",
8+
version: "0.0.1",
9+
type: "action",
10+
props: {
11+
oxylabs,
12+
username: {
13+
type: "string",
14+
label: "Username",
15+
description: "The username for the proxy user",
16+
},
17+
password: {
18+
type: "string",
19+
label: "Password",
20+
description: "The password for the proxy user",
21+
},
22+
sessid: {
23+
type: "string",
24+
label: "Session ID",
25+
description: "Session ID to keep the same IP with upcoming queries. The session expires in 10 minutes. After that, a new IP address is assigned to that session ID. Random string, 0-9, and A-Z characters are supported.",
26+
},
27+
cc: {
28+
type: "string",
29+
label: "Country Code",
30+
description: "Case insensitive country code in 2-letter [3166-1 alpha-2 format](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2)",
31+
optional: true,
32+
},
33+
city: {
34+
type: "string",
35+
label: "City",
36+
description: "Case insensitive city name in English. This parameter must be accompanied by cc for better accuracy.",
37+
optional: true,
38+
},
39+
st: {
40+
type: "string",
41+
label: "State",
42+
description: "Case insensitive US state name with us_ in the beginning, for example, `us_california`, `us_illinois`",
43+
optional: true,
44+
},
45+
sstime: {
46+
type: "string",
47+
label: "Session Time",
48+
description: "Session time in minutes. The session time parameter keeps the same IP for a certain period. The maximum session time is 30 minutes.",
49+
optional: true,
50+
},
51+
},
52+
async run({ $ }) {
53+
const {
54+
username,
55+
password,
56+
sessid,
57+
cc,
58+
city,
59+
st,
60+
sstime,
61+
} = this;
62+
63+
if (city && !cc) {
64+
throw new ConfigurationError("City must be accompanied by country code");
65+
}
66+
67+
const proxyUrl = `http://customer-${username}${cc
68+
? `-cc-${cc}`
69+
: ""}${city
70+
? `-city-${city}`
71+
: ""}${st
72+
? `-st-${st}`
73+
: ""}${sessid
74+
? `-sessid-${sessid}`
75+
: ""}${sstime
76+
? `-sstime-${sstime}`
77+
: ""}:${password}@pr.oxylabs.io:7777`;
78+
const response = await this.oxylabs.createSession({
79+
$,
80+
proxyUrl,
81+
});
82+
$.export("$summary", `Successfully created proxy session with session ID: ${this.sessid}`);
83+
return response;
84+
},
85+
};
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import oxylabs from "../../oxylabs.app.mjs";
2+
import { parseObject } from "../../common/utils.mjs";
3+
4+
export default {
5+
key: "oxylabs-create-schedule",
6+
name: "Create Schedule",
7+
description: "Create a schedule for a scraping job. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/features/scheduler#create-a-new-schedule)",
8+
version: "0.0.1",
9+
type: "action",
10+
props: {
11+
oxylabs,
12+
chron: {
13+
type: "string",
14+
label: "Cron Expression",
15+
description: "Cron schedule expression. It determines how often the submitted schedule will run. E.g. `0 3 * * 1`. Read more [here](https://crontab.guru/) and [here](https://docs.oracle.com/cd/E12058_01/doc/doc.1014/e12030/cron_expressions.htm).",
16+
},
17+
items: {
18+
type: "string[]",
19+
label: "Items",
20+
description: "List of Scraper APIs job parameter sets that should be executed as part of the schedule. E.g. `[{\"source\": \"universal\", \"url\": \"https://ip.oxylabs.io\"}]` [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/features/scheduler#create-a-new-schedule) for more information.",
21+
propDefinition: [
22+
oxylabs,
23+
"items",
24+
],
25+
},
26+
endTime: {
27+
type: "string",
28+
label: "End Time",
29+
description: "The time at which the schedule should stop running. E.g. `2032-12-21 12:34:45`",
30+
},
31+
},
32+
async run({ $ }) {
33+
const response = await this.oxylabs.createSchedule({
34+
$,
35+
data: {
36+
cron: this.chron,
37+
items: parseObject(this.items),
38+
end_time: this.endTime,
39+
},
40+
});
41+
$.export("$summary", `Successfully created schedule: ${response.schedule_id}`);
42+
return response;
43+
},
44+
};
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import oxylabs from "../../oxylabs.app.mjs";
2+
import constants from "../../common/constants.mjs";
3+
4+
export default {
5+
key: "oxylabs-scrape-url",
6+
name: "Scrape URL",
7+
description: "Scrape a URL. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api)",
8+
version: "0.0.1",
9+
type: "action",
10+
props: {
11+
oxylabs,
12+
source: {
13+
type: "string",
14+
label: "Source",
15+
description: "Sets the scraper that will be used to process your request",
16+
options: constants.URL_SOURCES,
17+
default: "universal",
18+
},
19+
url: {
20+
type: "string",
21+
label: "URL",
22+
description: "The URL to scrape",
23+
},
24+
geoLocation: {
25+
propDefinition: [
26+
oxylabs,
27+
"geoLocation",
28+
],
29+
},
30+
},
31+
async run({ $ }) {
32+
const response = await this.oxylabs.scrape({
33+
$,
34+
data: {
35+
source: this.source,
36+
url: this.url,
37+
geo_location: this.geoLocation,
38+
},
39+
});
40+
$.export("$summary", `Successfully scraped URL: ${this.url}`);
41+
return response;
42+
},
43+
};
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import oxylabs from "../../oxylabs.app.mjs";
2+
import constants from "../../common/constants.mjs";
3+
4+
export default {
5+
key: "oxylabs-scrape-with-query",
6+
name: "Scrape with Query",
7+
description: "Extract data using a search query. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api)",
8+
version: "0.0.1",
9+
type: "action",
10+
props: {
11+
oxylabs,
12+
source: {
13+
type: "string",
14+
label: "Source",
15+
description: "Sets the scraper that will be used to process your request",
16+
options: constants.QUERY_SOURCES,
17+
},
18+
query: {
19+
type: "string",
20+
label: "Query",
21+
description: "The query to search for. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/targets) for more information about specific sources/targets",
22+
},
23+
geoLocation: {
24+
propDefinition: [
25+
oxylabs,
26+
"geoLocation",
27+
],
28+
},
29+
},
30+
async run({ $ }) {
31+
const response = await this.oxylabs.scrape({
32+
$,
33+
data: {
34+
source: this.source,
35+
query: this.query,
36+
geo_location: this.geoLocation,
37+
},
38+
});
39+
$.export("$summary", `Successfully scraped using query: ${this.query}`);
40+
return response;
41+
},
42+
};

components/oxylabs/app/oxylabs.app.ts

Lines changed: 0 additions & 13 deletions
This file was deleted.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
const URL_SOURCES = [
2+
"universal",
3+
"amazon",
4+
"google",
5+
"bing",
6+
"kroger",
7+
];
8+
9+
const QUERY_SOURCES = [
10+
"amazon_product",
11+
"amazon_search",
12+
"amazon_pricing",
13+
"amazon_sellers",
14+
"amazon_bestsellers",
15+
"amazon_reviews",
16+
"amazon_questions",
17+
"google_search",
18+
"google_ads",
19+
"google_images",
20+
"google_lens",
21+
"google_maps",
22+
"google_travel_hotels",
23+
"google_suggest",
24+
"google_trends_explore",
25+
"google_shopping_product",
26+
"google_shopping_search",
27+
"google_shopping_pricing",
28+
"bing_search",
29+
"kroger_product",
30+
"kroger_search",
31+
"youtube_transcript",
32+
];
33+
34+
export default {
35+
URL_SOURCES,
36+
QUERY_SOURCES,
37+
};
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
export const parseObject = (obj) => {
2+
if (!obj) {
3+
return undefined;
4+
}
5+
if (typeof obj === "string") {
6+
try {
7+
return JSON.parse(obj);
8+
} catch (e) {
9+
return obj;
10+
}
11+
}
12+
if (Array.isArray(obj)) {
13+
return obj.map(parseObject);
14+
}
15+
if (typeof obj === "object") {
16+
return Object.fromEntries(Object.entries(obj).map(([
17+
key,
18+
value,
19+
]) => [
20+
key,
21+
parseObject(value),
22+
]));
23+
}
24+
return obj;
25+
};

components/oxylabs/oxylabs.app.mjs

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { axios } from "@pipedream/platform";
2+
import { HttpsProxyAgent } from "https-proxy-agent";
3+
4+
export default {
5+
type: "app",
6+
app: "oxylabs",
7+
propDefinitions: {
8+
scheduleId: {
9+
type: "string",
10+
label: "Schedule ID",
11+
description: "The ID of the schedule to watch",
12+
async options() {
13+
const { schedules } = await this.listSchedules();
14+
return schedules || [];
15+
},
16+
},
17+
geoLocation: {
18+
type: "string",
19+
label: "Geo Location",
20+
description: "The geo location to scrape from. [See the guide](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/features/localization/e-commerce-localization) for using this property.",
21+
optional: true,
22+
},
23+
},
24+
methods: {
25+
_getBaseUrl() {
26+
return `https://${this.$auth.api_name}.oxylabs.io/v1`;
27+
},
28+
_makeRequest({
29+
$ = this, path, ...opts
30+
}) {
31+
return axios($, {
32+
url: `${this._getBaseUrl()}${path}`,
33+
headers: {
34+
"Content-Type": "application/json",
35+
},
36+
auth: {
37+
username: `${this.$auth.username}`,
38+
password: `${this.$auth.password}`,
39+
},
40+
...opts,
41+
});
42+
},
43+
scrape(opts = {}) {
44+
return this._makeRequest({
45+
method: "POST",
46+
path: "/queries",
47+
...opts,
48+
});
49+
},
50+
listSchedules(opts = {}) {
51+
return this._makeRequest({
52+
path: "/schedules",
53+
...opts,
54+
});
55+
},
56+
async createSession({
57+
$ = this, proxyUrl, ...opts
58+
}) {
59+
const agent = new HttpsProxyAgent(proxyUrl);
60+
return axios($, {
61+
url: "https://ip.oxylabs.io/location",
62+
httpsAgent: agent,
63+
...opts,
64+
});
65+
},
66+
createSchedule(opts = {}) {
67+
return this._makeRequest({
68+
method: "POST",
69+
path: "/schedules",
70+
...opts,
71+
});
72+
},
73+
getRunsInfo({
74+
scheduleId, ...opts
75+
}) {
76+
return this._makeRequest({
77+
path: `/schedules/${scheduleId}/runs`,
78+
...opts,
79+
});
80+
},
81+
},
82+
};

0 commit comments

Comments
 (0)