diff --git a/components/crawlbase/actions/crawl-url/crawl-url.mjs b/components/crawlbase/actions/crawl-url/crawl-url.mjs new file mode 100644 index 0000000000000..cd8151841c4f0 --- /dev/null +++ b/components/crawlbase/actions/crawl-url/crawl-url.mjs @@ -0,0 +1,64 @@ +import crawlbase from "../../crawlbase.app.mjs"; +import constants from "../../common/constants.mjs"; + +export default { + key: "crawlbase-crawl-url", + name: "Crawl URL", + description: "Crawl a URL. [See the documentation](https://crawlbase.com/docs/crawling-api/)", + version: "0.0.1", + type: "action", + annotations: { + destructiveHint: false, + openWorldHint: true, + readOnlyHint: false, + }, + props: { + crawlbase, + url: { + type: "string", + label: "URL", + description: "The URL to crawl", + }, + scraper: { + type: "string", + label: "Scraper", + description: "The scraper to use", + options: constants.SCRAPERS, + optional: true, + }, + screenshot: { + type: "boolean", + label: "Screenshot", + description: "Set to `true` to take a screenshot of the page. Must use a JavaScript token for authentication.", + optional: true, + }, + store: { + type: "boolean", + label: "Store", + description: "Set to `true` to store a copy of the API response in the Crawlbase Cloud Storage", + optional: true, + }, + getHeaders: { + type: "boolean", + label: "Get Headers", + description: "Set to `true` to get the headers of the page", + optional: true, + }, + }, + async run({ $ }) { + const response = await this.crawlbase.makeRequest({ + $, + params: { + url: this.url, + scraper: this.scraper, + screenshot: this.screenshot, + store: this.store, + get_headers: this.getHeaders, + format: "json", + }, + }); + + $.export("$summary", `Successfully crawled URL: ${this.url}`); + return response; + }, +}; diff --git a/components/crawlbase/common/constants.mjs b/components/crawlbase/common/constants.mjs new file mode 100644 index 0000000000000..71a74e125e329 --- /dev/null +++ b/components/crawlbase/common/constants.mjs @@ -0,0 +1,48 @@ +const SCRAPERS = [ + "amazon-product-details", + "amazon-serp", + "amazon-offer-listing", + "amazon-best-sellers", + "amazon-new-releases", + "google-serp", + "google-product-offers", + "facebook-group", + "facebook-page", + "facebook-profile", + "facebook-hashtag", + "facebook-event", + "instagram-reel", + "instagram-post", + "instagram-profile", + "instagram-reels-audio", + "tiktok-product", + "tiktok-shop", + "tiktok-profile", + "shein-product", + "linkedin-profile", + "linkedin-company", + "linkedin-feed", + "quora-question", + "airbnb-serp", + "ebay-serp", + "ebay-product", + "ebay-seller-shop", + "aliexpress-product", + "aliexpress-serp", + "bing-serp", + "immobilienscout24-property", + "walmart-serp", + "walmart-product-details", + "walmart-category", + "bestbuy-serp", + "bestbuy-product-details", + "g2-product-reviews", + "eventbrite-events-list", + "eventbrite-event-details", + "generic-extractor", + "email-extractor", +]; + +export default { + SCRAPERS, +}; diff --git a/components/crawlbase/crawlbase.app.mjs b/components/crawlbase/crawlbase.app.mjs index c15a9e54f4f5d..16c631dd3e05a 100644 --- a/components/crawlbase/crawlbase.app.mjs +++ b/components/crawlbase/crawlbase.app.mjs @@ -1,11 +1,24 @@ +import { axios } from "@pipedream/platform"; + export default { type: "app", app: "crawlbase", propDefinitions: {}, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); + _baseUrl() { + return "https://api.crawlbase.com"; + }, + makeRequest({ + $ = this, path = "", params = {}, ...opts + }) { + return axios($, { + url: `${this._baseUrl()}${path}`, + params: { + ...params, + token: `${this.$auth.api_token}`, + }, + ...opts, + }); }, }, -}; \ No newline at end of file +}; diff --git a/components/crawlbase/package.json b/components/crawlbase/package.json index c8f60455b48ce..c909b0ce1fca9 100644 --- a/components/crawlbase/package.json +++ b/components/crawlbase/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/crawlbase", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream Crawlbase Components", "main": "crawlbase.app.mjs", "keywords": [ @@ -11,5 +11,8 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.1.0" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c733edd6811f7..00e49c060c84c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -3331,7 +3331,11 @@ importers: components/cratedb_cloud: {} - components/crawlbase: {} + components/crawlbase: + dependencies: + '@pipedream/platform': + specifier: ^3.1.0 + version: 3.1.0 components/credit_repair_cloud: dependencies: @@ -13828,8 +13832,7 @@ importers: components/stack_overflow_for_teams: {} - components/stackby: - specifiers: {} + components/stackby: {} components/stackshare_api: {}