Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions components/oxylabs/.gitignore

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import oxylabs from "../../oxylabs.app.mjs";
import { ConfigurationError } from "@pipedream/platform";

export default {
key: "oxylabs-create-proxy-session",
name: "Create Proxy Session",
description: "Establish a proxy session using the Residential Proxy endpoint. [See the documentation](https://developers.oxylabs.io/proxies/residential-proxies/session-control#establishing-session)",
version: "0.0.1",
type: "action",
props: {
oxylabs,
username: {
type: "string",
label: "Username",
description: "The username for the proxy user",
},
password: {
type: "string",
label: "Password",
description: "The password for the proxy user",
},
sessid: {
type: "string",
label: "Session ID",
description: "Session ID to keep the same IP with upcoming queries. The session expires in 10 minutes. After that, a new IP address is assigned to that session ID. Random string, 0-9, and A-Z characters are supported.",
},
cc: {
type: "string",
label: "Country Code",
description: "Case insensitive country code in 2-letter [3166-1 alpha-2 format](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2)",
optional: true,
},
city: {
type: "string",
label: "City",
description: "Case insensitive city name in English. This parameter must be accompanied by cc for better accuracy.",
optional: true,
},
st: {
type: "string",
label: "State",
description: "Case insensitive US state name with us_ in the beginning, for example, `us_california`, `us_illinois`",
optional: true,
},
sstime: {
type: "string",
label: "Session Time",
description: "Session time in minutes. The session time parameter keeps the same IP for a certain period. The maximum session time is 30 minutes.",
optional: true,
},
},
async run({ $ }) {
const {
username,
password,
sessid,
cc,
city,
st,
sstime,
} = this;

if (city && !cc) {
throw new ConfigurationError("City must be accompanied by country code");
}

const proxyUrl = `http://customer-${username}${cc
? `-cc-${cc}`
: ""}${city
? `-city-${city}`
: ""}${st
? `-st-${st}`
: ""}${sessid
? `-sessid-${sessid}`
: ""}${sstime
? `-sstime-${sstime}`
: ""}:${password}@pr.oxylabs.io:7777`;
const response = await this.oxylabs.createSession({
$,
proxyUrl,
});
$.export("$summary", `Successfully created proxy session with session ID: ${this.sessid}`);
return response;
},
};
44 changes: 44 additions & 0 deletions components/oxylabs/actions/create-schedule/create-schedule.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import oxylabs from "../../oxylabs.app.mjs";
import { parseObject } from "../../common/utils.mjs";

export default {
key: "oxylabs-create-schedule",
name: "Create Schedule",
description: "Create a schedule for a scraping job. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/features/scheduler#create-a-new-schedule)",
version: "0.0.1",
type: "action",
props: {
oxylabs,
chron: {
type: "string",
label: "Cron Expression",
description: "Cron schedule expression. It determines how often the submitted schedule will run. E.g. `0 3 * * 1`. Read more [here](https://crontab.guru/) and [here](https://docs.oracle.com/cd/E12058_01/doc/doc.1014/e12030/cron_expressions.htm).",
},
items: {
type: "string[]",
label: "Items",
description: "List of Scraper APIs job parameter sets that should be executed as part of the schedule. E.g. `[{\"source\": \"universal\", \"url\": \"https://ip.oxylabs.io\"}]` [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/features/scheduler#create-a-new-schedule) for more information.",
propDefinition: [
oxylabs,
"items",
],
},
endTime: {
type: "string",
label: "End Time",
description: "The time at which the schedule should stop running. E.g. `2032-12-21 12:34:45`",
},
},
async run({ $ }) {
const response = await this.oxylabs.createSchedule({
$,
data: {
cron: this.chron,
items: parseObject(this.items),
end_time: this.endTime,
},
});
$.export("$summary", `Successfully created schedule: ${response.schedule_id}`);
return response;
},
};
43 changes: 43 additions & 0 deletions components/oxylabs/actions/scrape-url/scrape-url.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import oxylabs from "../../oxylabs.app.mjs";
import constants from "../../common/constants.mjs";

export default {
key: "oxylabs-scrape-url",
name: "Scrape URL",
description: "Scrape a URL. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api)",
version: "0.0.1",
type: "action",
props: {
oxylabs,
source: {
type: "string",
label: "Source",
description: "Sets the scraper that will be used to process your request",
options: constants.URL_SOURCES,
default: "universal",
},
url: {
type: "string",
label: "URL",
description: "The URL to scrape",
},
geoLocation: {
propDefinition: [
oxylabs,
"geoLocation",
],
},
},
async run({ $ }) {
const response = await this.oxylabs.scrape({
$,
data: {
source: this.source,
url: this.url,
geo_location: this.geoLocation,
},
});
$.export("$summary", `Successfully scraped URL: ${this.url}`);
return response;
},
};
42 changes: 42 additions & 0 deletions components/oxylabs/actions/scrape-with-query/scrape-with-query.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import oxylabs from "../../oxylabs.app.mjs";
import constants from "../../common/constants.mjs";

export default {
key: "oxylabs-scrape-with-query",
name: "Scrape with Query",
description: "Extract data using a search query. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api)",
version: "0.0.1",
type: "action",
props: {
oxylabs,
source: {
type: "string",
label: "Source",
description: "Sets the scraper that will be used to process your request",
options: constants.QUERY_SOURCES,
},
query: {
type: "string",
label: "Query",
description: "The query to search for. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/targets) for more information about specific sources/targets",
},
geoLocation: {
propDefinition: [
oxylabs,
"geoLocation",
],
},
},
async run({ $ }) {
const response = await this.oxylabs.scrape({
$,
data: {
source: this.source,
query: this.query,
geo_location: this.geoLocation,
},
});
$.export("$summary", `Successfully scraped using query: ${this.query}`);
return response;
},
};
13 changes: 0 additions & 13 deletions components/oxylabs/app/oxylabs.app.ts

This file was deleted.

37 changes: 37 additions & 0 deletions components/oxylabs/common/constants.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
const URL_SOURCES = [
"universal",
"amazon",
"google",
"bing",
"kroger",
];

const QUERY_SOURCES = [
"amazon_product",
"amazon_search",
"amazon_pricing",
"amazon_sellers",
"amazon_bestsellers",
"amazon_reviews",
"amazon_questions",
"google_search",
"google_ads",
"google_images",
"google_lens",
"google_maps",
"google_travel_hotels",
"google_suggest",
"google_trends_explore",
"google_shopping_product",
"google_shopping_search",
"google_shopping_pricing",
"bing_search",
"kroger_product",
"kroger_search",
"youtube_transcript",
];

export default {
URL_SOURCES,
QUERY_SOURCES,
};
25 changes: 25 additions & 0 deletions components/oxylabs/common/utils.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
export const parseObject = (obj) => {
if (!obj) {
return undefined;
}
if (typeof obj === "string") {
try {
return JSON.parse(obj);
} catch (e) {
return obj;
}
}
if (Array.isArray(obj)) {
return obj.map(parseObject);
}
if (typeof obj === "object") {
return Object.fromEntries(Object.entries(obj).map(([
key,
value,
]) => [
key,
parseObject(value),
]));
}
return obj;
};
82 changes: 82 additions & 0 deletions components/oxylabs/oxylabs.app.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { axios } from "@pipedream/platform";
import { HttpsProxyAgent } from "https-proxy-agent";

export default {
type: "app",
app: "oxylabs",
propDefinitions: {
scheduleId: {
type: "string",
label: "Schedule ID",
description: "The ID of the schedule to watch",
async options() {
const { schedules } = await this.listSchedules();
return schedules || [];
},
},
geoLocation: {
type: "string",
label: "Geo Location",
description: "The geo locatio to scrape from. E.g. `United States`",
optional: true,
},
},
methods: {
_getBaseUrl() {
return `https://${this.$auth.api_name}.oxylabs.io/v1`;
},
_makeRequest({
$ = this, path, ...opts
}) {
return axios($, {
url: `${this._getBaseUrl()}${path}`,
headers: {
"Content-Type": "application/json",
},
auth: {
username: `${this.$auth.username}`,
password: `${this.$auth.password}`,
},
...opts,
});
},
scrape(opts = {}) {
return this._makeRequest({
method: "POST",
path: "/queries",
...opts,
});
},
listSchedules(opts = {}) {
return this._makeRequest({
path: "/schedules",
...opts,
});
},
async createSession({
$ = this, proxyUrl, ...opts
}) {
const agent = new HttpsProxyAgent(proxyUrl);
return axios($, {
url: "https://ip.oxylabs.io/location",
httpsAgent: agent,
...opts,
});
},
createSchedule(opts = {}) {
return this._makeRequest({
method: "POST",
path: "/schedules",
...opts,
});
},
getRunsInfo({
scheduleId, ...opts
}) {
return this._makeRequest({
path: `/schedules/${scheduleId}/runs`,
...opts,
});
},
},
};
Loading
Loading