diff --git a/components/oxylabs/.gitignore b/components/oxylabs/.gitignore deleted file mode 100644 index ec761ccab7595..0000000000000 --- a/components/oxylabs/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.js -*.mjs -dist \ No newline at end of file diff --git a/components/oxylabs/actions/create-proxy-session/create-proxy-session.mjs b/components/oxylabs/actions/create-proxy-session/create-proxy-session.mjs new file mode 100644 index 0000000000000..f9132714c7b9e --- /dev/null +++ b/components/oxylabs/actions/create-proxy-session/create-proxy-session.mjs @@ -0,0 +1,85 @@ +import oxylabs from "../../oxylabs.app.mjs"; +import { ConfigurationError } from "@pipedream/platform"; + +export default { + key: "oxylabs-create-proxy-session", + name: "Create Proxy Session", + description: "Establish a proxy session using the Residential Proxy endpoint. [See the documentation](https://developers.oxylabs.io/proxies/residential-proxies/session-control#establishing-session)", + version: "0.0.1", + type: "action", + props: { + oxylabs, + username: { + type: "string", + label: "Username", + description: "The username for the proxy user", + }, + password: { + type: "string", + label: "Password", + description: "The password for the proxy user", + }, + sessid: { + type: "string", + label: "Session ID", + description: "Session ID to keep the same IP with upcoming queries. The session expires in 10 minutes. After that, a new IP address is assigned to that session ID. Random string, 0-9, and A-Z characters are supported.", + }, + cc: { + type: "string", + label: "Country Code", + description: "Case insensitive country code in 2-letter [3166-1 alpha-2 format](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2)", + optional: true, + }, + city: { + type: "string", + label: "City", + description: "Case insensitive city name in English. This parameter must be accompanied by cc for better accuracy.", + optional: true, + }, + st: { + type: "string", + label: "State", + description: "Case insensitive US state name with us_ in the beginning, for example, `us_california`, `us_illinois`", + optional: true, + }, + sstime: { + type: "string", + label: "Session Time", + description: "Session time in minutes. The session time parameter keeps the same IP for a certain period. The maximum session time is 30 minutes.", + optional: true, + }, + }, + async run({ $ }) { + const { + username, + password, + sessid, + cc, + city, + st, + sstime, + } = this; + + if (city && !cc) { + throw new ConfigurationError("City must be accompanied by country code"); + } + + const proxyUrl = `http://customer-${username}${cc + ? `-cc-${cc}` + : ""}${city + ? `-city-${city}` + : ""}${st + ? `-st-${st}` + : ""}${sessid + ? `-sessid-${sessid}` + : ""}${sstime + ? `-sstime-${sstime}` + : ""}:${password}@pr.oxylabs.io:7777`; + const response = await this.oxylabs.createSession({ + $, + proxyUrl, + }); + $.export("$summary", `Successfully created proxy session with session ID: ${this.sessid}`); + return response; + }, +}; diff --git a/components/oxylabs/actions/create-schedule/create-schedule.mjs b/components/oxylabs/actions/create-schedule/create-schedule.mjs new file mode 100644 index 0000000000000..3289ffd9c6eaf --- /dev/null +++ b/components/oxylabs/actions/create-schedule/create-schedule.mjs @@ -0,0 +1,44 @@ +import oxylabs from "../../oxylabs.app.mjs"; +import { parseObject } from "../../common/utils.mjs"; + +export default { + key: "oxylabs-create-schedule", + name: "Create Schedule", + description: "Create a schedule for a scraping job. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/features/scheduler#create-a-new-schedule)", + version: "0.0.1", + type: "action", + props: { + oxylabs, + chron: { + type: "string", + label: "Cron Expression", + description: "Cron schedule expression. It determines how often the submitted schedule will run. E.g. `0 3 * * 1`. Read more [here](https://crontab.guru/) and [here](https://docs.oracle.com/cd/E12058_01/doc/doc.1014/e12030/cron_expressions.htm).", + }, + items: { + type: "string[]", + label: "Items", + description: "List of Scraper APIs job parameter sets that should be executed as part of the schedule. E.g. `[{\"source\": \"universal\", \"url\": \"https://ip.oxylabs.io\"}]` [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/features/scheduler#create-a-new-schedule) for more information.", + propDefinition: [ + oxylabs, + "items", + ], + }, + endTime: { + type: "string", + label: "End Time", + description: "The time at which the schedule should stop running. E.g. `2032-12-21 12:34:45`", + }, + }, + async run({ $ }) { + const response = await this.oxylabs.createSchedule({ + $, + data: { + cron: this.chron, + items: parseObject(this.items), + end_time: this.endTime, + }, + }); + $.export("$summary", `Successfully created schedule: ${response.schedule_id}`); + return response; + }, +}; diff --git a/components/oxylabs/actions/scrape-url/scrape-url.mjs b/components/oxylabs/actions/scrape-url/scrape-url.mjs new file mode 100644 index 0000000000000..7a1dd4adb0367 --- /dev/null +++ b/components/oxylabs/actions/scrape-url/scrape-url.mjs @@ -0,0 +1,43 @@ +import oxylabs from "../../oxylabs.app.mjs"; +import constants from "../../common/constants.mjs"; + +export default { + key: "oxylabs-scrape-url", + name: "Scrape URL", + description: "Scrape a URL. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api)", + version: "0.0.1", + type: "action", + props: { + oxylabs, + source: { + type: "string", + label: "Source", + description: "Sets the scraper that will be used to process your request", + options: constants.URL_SOURCES, + default: "universal", + }, + url: { + type: "string", + label: "URL", + description: "The URL to scrape", + }, + geoLocation: { + propDefinition: [ + oxylabs, + "geoLocation", + ], + }, + }, + async run({ $ }) { + const response = await this.oxylabs.scrape({ + $, + data: { + source: this.source, + url: this.url, + geo_location: this.geoLocation, + }, + }); + $.export("$summary", `Successfully scraped URL: ${this.url}`); + return response; + }, +}; diff --git a/components/oxylabs/actions/scrape-with-query/scrape-with-query.mjs b/components/oxylabs/actions/scrape-with-query/scrape-with-query.mjs new file mode 100644 index 0000000000000..01397af648a91 --- /dev/null +++ b/components/oxylabs/actions/scrape-with-query/scrape-with-query.mjs @@ -0,0 +1,42 @@ +import oxylabs from "../../oxylabs.app.mjs"; +import constants from "../../common/constants.mjs"; + +export default { + key: "oxylabs-scrape-with-query", + name: "Scrape with Query", + description: "Extract data using a search query. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api)", + version: "0.0.1", + type: "action", + props: { + oxylabs, + source: { + type: "string", + label: "Source", + description: "Sets the scraper that will be used to process your request", + options: constants.QUERY_SOURCES, + }, + query: { + type: "string", + label: "Query", + description: "The query to search for. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/targets) for more information about specific sources/targets", + }, + geoLocation: { + propDefinition: [ + oxylabs, + "geoLocation", + ], + }, + }, + async run({ $ }) { + const response = await this.oxylabs.scrape({ + $, + data: { + source: this.source, + query: this.query, + geo_location: this.geoLocation, + }, + }); + $.export("$summary", `Successfully scraped using query: ${this.query}`); + return response; + }, +}; diff --git a/components/oxylabs/app/oxylabs.app.ts b/components/oxylabs/app/oxylabs.app.ts deleted file mode 100644 index 0b00b99a1d796..0000000000000 --- a/components/oxylabs/app/oxylabs.app.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { defineApp } from "@pipedream/types"; - -export default defineApp({ - type: "app", - app: "oxylabs", - propDefinitions: {}, - methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); - }, - }, -}); \ No newline at end of file diff --git a/components/oxylabs/common/constants.mjs b/components/oxylabs/common/constants.mjs new file mode 100644 index 0000000000000..cebcb1f1bdf41 --- /dev/null +++ b/components/oxylabs/common/constants.mjs @@ -0,0 +1,37 @@ +const URL_SOURCES = [ + "universal", + "amazon", + "google", + "bing", + "kroger", +]; + +const QUERY_SOURCES = [ + "amazon_product", + "amazon_search", + "amazon_pricing", + "amazon_sellers", + "amazon_bestsellers", + "amazon_reviews", + "amazon_questions", + "google_search", + "google_ads", + "google_images", + "google_lens", + "google_maps", + "google_travel_hotels", + "google_suggest", + "google_trends_explore", + "google_shopping_product", + "google_shopping_search", + "google_shopping_pricing", + "bing_search", + "kroger_product", + "kroger_search", + "youtube_transcript", +]; + +export default { + URL_SOURCES, + QUERY_SOURCES, +}; diff --git a/components/oxylabs/common/utils.mjs b/components/oxylabs/common/utils.mjs new file mode 100644 index 0000000000000..9fdc004c18f32 --- /dev/null +++ b/components/oxylabs/common/utils.mjs @@ -0,0 +1,25 @@ +export const parseObject = (obj) => { + if (!obj) { + return undefined; + } + if (typeof obj === "string") { + try { + return JSON.parse(obj); + } catch (e) { + return obj; + } + } + if (Array.isArray(obj)) { + return obj.map(parseObject); + } + if (typeof obj === "object") { + return Object.fromEntries(Object.entries(obj).map(([ + key, + value, + ]) => [ + key, + parseObject(value), + ])); + } + return obj; +}; diff --git a/components/oxylabs/oxylabs.app.mjs b/components/oxylabs/oxylabs.app.mjs new file mode 100644 index 0000000000000..da2556a6b55aa --- /dev/null +++ b/components/oxylabs/oxylabs.app.mjs @@ -0,0 +1,82 @@ +import { axios } from "@pipedream/platform"; +import { HttpsProxyAgent } from "https-proxy-agent"; + +export default { + type: "app", + app: "oxylabs", + propDefinitions: { + scheduleId: { + type: "string", + label: "Schedule ID", + description: "The ID of the schedule to watch", + async options() { + const { schedules } = await this.listSchedules(); + return schedules || []; + }, + }, + geoLocation: { + type: "string", + label: "Geo Location", + description: "The geo location to scrape from. [See the guide](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/features/localization/e-commerce-localization) for using this property.", + optional: true, + }, + }, + methods: { + _getBaseUrl() { + return `https://${this.$auth.api_name}.oxylabs.io/v1`; + }, + _makeRequest({ + $ = this, path, ...opts + }) { + return axios($, { + url: `${this._getBaseUrl()}${path}`, + headers: { + "Content-Type": "application/json", + }, + auth: { + username: `${this.$auth.username}`, + password: `${this.$auth.password}`, + }, + ...opts, + }); + }, + scrape(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/queries", + ...opts, + }); + }, + listSchedules(opts = {}) { + return this._makeRequest({ + path: "/schedules", + ...opts, + }); + }, + async createSession({ + $ = this, proxyUrl, ...opts + }) { + const agent = new HttpsProxyAgent(proxyUrl); + return axios($, { + url: "https://ip.oxylabs.io/location", + httpsAgent: agent, + ...opts, + }); + }, + createSchedule(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/schedules", + ...opts, + }); + }, + getRunsInfo({ + scheduleId, ...opts + }) { + return this._makeRequest({ + path: `/schedules/${scheduleId}/runs`, + ...opts, + }); + }, + }, +}; diff --git a/components/oxylabs/package.json b/components/oxylabs/package.json index 25bc77da11089..8f4c393fde633 100644 --- a/components/oxylabs/package.json +++ b/components/oxylabs/package.json @@ -1,16 +1,19 @@ { "name": "@pipedream/oxylabs", - "version": "0.0.2", + "version": "0.1.0", "description": "Pipedream Oxylabs Components", - "main": "dist/app/oxylabs.app.mjs", + "main": "oxylabs.app.mjs", "keywords": [ "pipedream", "oxylabs" ], - "files": ["dist"], "homepage": "https://pipedream.com/apps/oxylabs", "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.1.0", + "https-proxy-agent": "^7.0.6" } } diff --git a/components/oxylabs/sources/new-scheduled-run-completed/new-scheduled-run-completed.mjs b/components/oxylabs/sources/new-scheduled-run-completed/new-scheduled-run-completed.mjs new file mode 100644 index 0000000000000..0ce8a5424fc47 --- /dev/null +++ b/components/oxylabs/sources/new-scheduled-run-completed/new-scheduled-run-completed.mjs @@ -0,0 +1,46 @@ +import oxylabs from "../../oxylabs.app.mjs"; +import { DEFAULT_POLLING_SOURCE_TIMER_INTERVAL } from "@pipedream/platform"; +import sampleEmit from "./test-event.mjs"; + +export default { + key: "oxylabs-new-scheduled-run-completed", + name: "New Scheduled Run Completed", + description: "Emit new event when a new scheduled run is completed. [See the documentation](https://developers.oxylabs.io/scraping-solutions/web-scraper-api/features/scheduler#get-runs-information)", + version: "0.0.1", + type: "source", + dedupe: "unique", + props: { + oxylabs, + timer: { + type: "$.interface.timer", + default: { + intervalSeconds: DEFAULT_POLLING_SOURCE_TIMER_INTERVAL, + }, + }, + scheduleId: { + propDefinition: [ + oxylabs, + "scheduleId", + ], + }, + }, + methods: { + generateMeta(run) { + return { + id: run.id, + summary: `New Run with ID: ${run.id}`, + ts: Date.now(), + }; + }, + }, + async run() { + const { runs } = await this.oxylabs.getRunsInfo({ + scheduleId: this.scheduleId, + }); + for (const run of runs) { + const meta = this.generateMeta(run); + this.$emit(run, meta); + } + }, + sampleEmit, +}; diff --git a/components/oxylabs/sources/new-scheduled-run-completed/test-event.mjs b/components/oxylabs/sources/new-scheduled-run-completed/test-event.mjs new file mode 100644 index 0000000000000..3a6d59ceba271 --- /dev/null +++ b/components/oxylabs/sources/new-scheduled-run-completed/test-event.mjs @@ -0,0 +1,27 @@ +export default { + "run_id": 25037485, + "jobs": [ + { + "id": 7300439540206948353, + "create_status_code": 202, + "result_status": "done", + "created_at": "2025-02-26 09:00:21", + "result_created_at": "2025-02-26 09:00:23", + }, + { + "id": 7300439540169188353, + "create_status_code": 202, + "result_status": "done", + "created_at": "2025-02-26 09:00:21", + "result_created_at": "2025-02-26 09:00:22", + }, + { + "id": 7300439540198551553, + "create_status_code": 202, + "result_status": "done", + "created_at": "2025-02-26 09:00:21", + "result_created_at": "2025-02-26 09:00:23", + }, + ], + "success_rate": 1, +}; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index fcdd09748031d..5a063b36a6aae 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9528,7 +9528,14 @@ importers: components/oxford_dictionaries: {} - components/oxylabs: {} + components/oxylabs: + dependencies: + '@pipedream/platform': + specifier: ^3.1.0 + version: 3.1.0 + https-proxy-agent: + specifier: ^7.0.6 + version: 7.0.6 components/oyster: dependencies: @@ -15843,7 +15850,7 @@ importers: version: 3.1.7 ts-jest: specifier: ^29.2.5 - version: 29.2.5(@babel/core@8.0.0-alpha.13)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@8.0.0-alpha.13))(jest@29.7.0(@types/node@20.17.30)(babel-plugin-macros@3.1.0))(typescript@5.7.2) + version: 29.2.5(@babel/core@7.26.0)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.26.0))(jest@29.7.0(@types/node@20.17.30)(babel-plugin-macros@3.1.0))(typescript@5.7.2) tsup: specifier: ^8.3.6 version: 8.3.6(@microsoft/api-extractor@7.47.12(@types/node@20.17.30))(jiti@1.21.6)(postcss@8.4.49)(tsx@4.19.4)(typescript@5.7.2)(yaml@2.6.1) @@ -15886,7 +15893,7 @@ importers: version: 3.1.0 jest: specifier: ^29.1.2 - version: 29.7.0(@types/node@20.17.30)(babel-plugin-macros@3.1.0) + version: 29.7.0(@types/node@20.17.6)(babel-plugin-macros@3.1.0) type-fest: specifier: ^4.15.0 version: 4.27.0 @@ -21221,6 +21228,10 @@ packages: resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==} engines: {node: '>= 14'} + agent-base@7.1.3: + resolution: {integrity: sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==} + engines: {node: '>= 14'} + agentkeepalive@4.5.0: resolution: {integrity: sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==} engines: {node: '>= 8.0.0'} @@ -24752,8 +24763,8 @@ packages: resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} engines: {node: '>= 6'} - https-proxy-agent@7.0.5: - resolution: {integrity: sha512-1e4Wqeblerz+tMKPIq2EMGiiWW1dIjZOksyHWSUm1rmuvw/how9hBHZ38lAGj5ID4Ik6EdkOw7NmWPy6LAwalw==} + https-proxy-agent@7.0.6: + resolution: {integrity: sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==} engines: {node: '>= 14'} https@1.0.0: @@ -32546,7 +32557,7 @@ snapshots: '@azure/core-util': 1.11.0 '@azure/logger': 1.1.4 http-proxy-agent: 7.0.2 - https-proxy-agent: 7.0.5 + https-proxy-agent: 7.0.6 tslib: 2.8.1 transitivePeerDependencies: - supports-color @@ -38537,6 +38548,8 @@ snapshots: transitivePeerDependencies: - supports-color + agent-base@7.1.3: {} + agentkeepalive@4.5.0: dependencies: humanize-ms: 1.2.1 @@ -42209,7 +42222,7 @@ snapshots: gaxios@6.7.1: dependencies: extend: 3.0.2 - https-proxy-agent: 7.0.5 + https-proxy-agent: 7.0.6 is-stream: 2.0.1 node-fetch: 2.7.0 uuid: 9.0.1 @@ -42220,7 +42233,7 @@ snapshots: gaxios@7.0.0: dependencies: extend: 3.0.2 - https-proxy-agent: 7.0.5 + https-proxy-agent: 7.0.6 node-fetch: 3.3.2 transitivePeerDependencies: - supports-color @@ -43206,9 +43219,9 @@ snapshots: transitivePeerDependencies: - supports-color - https-proxy-agent@7.0.5: + https-proxy-agent@7.0.6: dependencies: - agent-base: 7.1.1 + agent-base: 7.1.3 debug: 4.4.0 transitivePeerDependencies: - supports-color @@ -46758,7 +46771,7 @@ snapshots: debug: 4.4.0 get-uri: 6.0.3 http-proxy-agent: 7.0.2 - https-proxy-agent: 7.0.5 + https-proxy-agent: 7.0.6 pac-resolver: 7.0.1 socks-proxy-agent: 8.0.4 transitivePeerDependencies: @@ -47318,7 +47331,7 @@ snapshots: agent-base: 7.1.1 debug: 4.4.0 http-proxy-agent: 7.0.2 - https-proxy-agent: 7.0.5 + https-proxy-agent: 7.0.6 lru-cache: 7.18.3 pac-proxy-agent: 7.0.2 proxy-from-env: 1.1.0 @@ -49807,7 +49820,7 @@ snapshots: ts-interface-checker@0.1.13: {} - ts-jest@29.2.5(@babel/core@8.0.0-alpha.13)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@8.0.0-alpha.13))(jest@29.7.0(@types/node@20.17.30)(babel-plugin-macros@3.1.0))(typescript@5.7.2): + ts-jest@29.2.5(@babel/core@7.26.0)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@7.26.0))(jest@29.7.0(@types/node@20.17.30)(babel-plugin-macros@3.1.0))(typescript@5.7.2): dependencies: bs-logger: 0.2.6 ejs: 3.1.10 @@ -49821,10 +49834,10 @@ snapshots: typescript: 5.7.2 yargs-parser: 21.1.1 optionalDependencies: - '@babel/core': 8.0.0-alpha.13 + '@babel/core': 7.26.0 '@jest/transform': 29.7.0 '@jest/types': 29.6.3 - babel-jest: 29.7.0(@babel/core@8.0.0-alpha.13) + babel-jest: 29.7.0(@babel/core@7.26.0) ts-jest@29.2.5(@babel/core@8.0.0-alpha.13)(@jest/transform@29.7.0)(@jest/types@29.6.3)(babel-jest@29.7.0(@babel/core@8.0.0-alpha.13))(jest@29.7.0(@types/node@20.17.6)(babel-plugin-macros@3.1.0))(typescript@5.6.3): dependencies: