PipedreamHQ
diff --git a/‎components/scrapeless/README.md‎
Lines changed: 16 additions & 0 deletions b/‎components/scrapeless/README.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎components/scrapeless/actions/crawler/crawler.mjs‎
Lines changed: 89 additions & 0 deletions b/‎components/scrapeless/actions/crawler/crawler.mjs‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎components/scrapeless/actions/get-scrape-result/get-scrape-result.mjs‎
Lines changed: 1 addition & 1 deletion b/‎components/scrapeless/actions/get-scrape-result/get-scrape-result.mjs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎components/scrapeless/actions/scraping-api/scraping-api.mjs‎
Lines changed: 124 additions & 0 deletions b/‎components/scrapeless/actions/scraping-api/scraping-api.mjs‎
Lines changed: 124 additions & 0 deletions
diff --git a/‎components/scrapeless/actions/submit-scrape-job/submit-scrape-job.mjs‎
Lines changed: 20 additions & 28 deletions b/‎components/scrapeless/actions/submit-scrape-job/submit-scrape-job.mjs‎
Lines changed: 20 additions & 28 deletions
@@ -0,0 +1,16 @@
+# Overview
+
+Scrapeless – your go-to platform for powerful, compliant web data extraction. With tools like Universal Scraping API, Scrapeless makes it easy to access and gather data from complex sites. Focus on insights while we handle the technical hurdles. Scrapeless – data extraction made simple.
+
+# Example Use Cases
+
+1. **Scraping API**: Endpoints for fresh, structured data from 100+ popular sites.
+2. **Universal Scraping API**: Access any website at scale and say goodbye to blocks.
+3. **Crawler**: Extract data from single pages or traverse entire domains.
+
+# Getting Started
+
+## Generating an API Key
+
+1. If you are not a member of Scrapeless, you can sign up for a free account at [Scrapeless](https://app.scrapeless.com/passport/register).
+2. Once registered, you can go to the API Key Management page to generate an API Key in the app settings.
@@ -0,0 +1,89 @@
+import scrapeless from "../../scrapeless.app.mjs";
+
+export default {
+  key: "scrapeless-crawler",
+  name: "Crawler",
+  description: "Crawl any website at scale and say goodbye to blocks. [See the documentation](https://apidocs.scrapeless.com/api-17509010).",
+  version: "0.0.2",
+  type: "action",
+  props: {
+    scrapeless,
+    apiServer: {
+      type: "string",
+      label: "Please select a API server",
+      description: "Please select a API server to use",
+      default: "crawl",
+      options: [
+        {
+          label: "Crawl",
+          value: "crawl",
+        },
+        {
+          label: "Scrape",
+          value: "scrape",
+        },
+      ],
+      reloadProps: true,
+    },
+  },
+  async run({ $ }) {
+    const {
+      scrapeless, apiServer, ...inputProps
+    } = this;
+
+    const browserOptions = {
+      "proxy_country": "ANY",
+      "session_name": "Crawl",
+      "session_recording": true,
+      "session_ttl": 900,
+    };
+
+    let response;
+
+    if (apiServer === "crawl") {
+      response =
+        await scrapeless._scrapelessClient().scrapingCrawl.crawl.crawlUrl(inputProps.url, {
+          limit: inputProps.limitCrawlPages,
+          browserOptions,
+        });
+    }
+
+    if (apiServer === "scrape") {
+      response =
+        await scrapeless._scrapelessClient().scrapingCrawl.scrape.scrapeUrl(inputProps.url, {
+          browserOptions,
+        });
+    }
+
+    if (response?.status === "completed" && response?.data) {
+      $.export("$summary", `Successfully retrieved crawling results for ${inputProps.url}`);
+      return response.data;
+    } else {
+      throw new Error(response?.error || "Failed to retrieve crawling results");
+    }
+  },
+  additionalProps() {
+    const { apiServer } = this;
+
+    const props = {};
+
+    if (apiServer === "crawl" || apiServer === "scrape") {
+      props.url = {
+        type: "string",
+        label: "URL to Crawl",
+        description: "If you want to crawl in batches, please refer to the SDK of the document",
+      };
+    }
+
+    if (apiServer === "crawl") {
+      props.limitCrawlPages = {
+        type: "integer",
+        label: "Number Of Subpages",
+        default: 5,
+        description: "Max number of results to return",
+      };
+    }
+
+    return props;
+  },
+};
@@ -4,7 +4,7 @@ export default {
   key: "scrapeless-get-scrape-result",
   name: "Get Scrape Result",
   description: "Retrieve the result of a completed scraping job. [See the documentation](https://apidocs.scrapeless.com/api-11949853)",
-  version: "0.0.1",
+  version: "0.0.2",
   type: "action",
   props: {
     scrapeless,
 
@@ -0,0 +1,124 @@
+import scrapeless from "../../scrapeless.app.mjs";
+import { log } from "../../common/utils.mjs";
+export default {
+  key: "scrapeless-scraping-api",
+  name: "Scraping API",
+  description: "Endpoints for fresh, structured data from 100+ popular sites. [See the documentation](https://apidocs.scrapeless.com/api-12919045).",
+  version: "0.0.1",
+  type: "action",
+  props: {
+    scrapeless,
+    apiServer: {
+      type: "string",
+      label: "Please select a API server",
+      default: "googleSearch",
+      description: "Please select a API server to use",
+      options: [
+        {
+          label: "Google Search",
+          value: "googleSearch",
+        },
+      ],
+      reloadProps: true,
+    },
+  },
+  async run({ $ }) {
+    const {
+      scrapeless, apiServer, ...inputProps
+    } = this;
+
+    const MAX_RETRIES = 3;
+    // 10 seconds
+    const DELAY = 1000 * 10;
+    const { run } = $.context;
+
+    let submitData;
+    let job;
+
+    // pre check if the job is already in the context
+    if (run?.context?.job) {
+      job = run.context.job;
+    }
+
+    if (apiServer === "googleSearch") {
+      submitData = {
+        actor: "scraper.google.search",
+        input: {
+          q: inputProps.q,
+          hl: inputProps.hl,
+          gl: inputProps.gl,
+        },
+      };
+    }
+
+    if (!submitData) {
+      throw new Error("No actor found");
+    }
+    // 1. Create a new scraping job
+    if (!job) {
+      job = await scrapeless._scrapelessClient().deepserp.createTask({
+        actor: submitData.actor,
+        input: submitData.input,
+      });
+
+      if (job.status === 200) {
+        $.export("$summary", "Successfully retrieved scraping results");
+        return job.data;
+      }
+
+      log("task in progress");
+    }
+
+    // 2. Wait for the job to complete
+    if (run.runs === 1) {
+      $.flow.rerun(DELAY, {
+        job,
+      }, MAX_RETRIES);
+    } else if (run.runs > MAX_RETRIES ) {
+      throw new Error("Max retries reached");
+    } else if (job && job?.data?.taskId) {
+      const result = await scrapeless._scrapelessClient().deepserp.getTaskResult(job.data.taskId);
+      if (result.status === 200) {
+        $.export("$summary", "Successfully retrieved scraping results");
+        return result.data;
+      } else {
+        $.flow.rerun(DELAY, {
+          job,
+        }, MAX_RETRIES);
+      }
+    } else {
+      throw new Error("No job found");
+    }
+
+  },
+  additionalProps() {
+    const { apiServer } = this;
+
+    const props = {};
+
+    if (apiServer === "googleSearch") {
+      props.q = {
+        type: "string",
+        label: "Search Query",
+        description: "Parameter defines the query you want to search. You can use anything that you would use in a regular Google search. e.g. inurl:, site:, intitle:. We also support advanced search query parameters such as as_dt and as_eq.",
+        default: "coffee",
+      };
+
+      props.hl = {
+        type: "string",
+        label: "Language",
+        description: "Parameter defines the language to use for the Google search. It's a two-letter language code. (e.g., en for English, es for Spanish, or fr for French).",
+        default: "en",
+      };
+
+      props.gl = {
+        type: "string",
+        label: "Country",
+        description: "Parameter defines the country to use for the Google search. It's a two-letter country code. (e.g., us for the United States, uk for United Kingdom, or fr for France).",
+        default: "us",
+      };
+    }
+
+    return props;
+  },
+};
@@ -1,76 +1,68 @@
-import { ConfigurationError } from "@pipedream/platform";
-import { ACTOR_OPTIONS } from "../../common/constants.mjs";
-import { parseObject } from "../../common/utils.mjs";
+import { COUNTRY_OPTIONS } from "../../common/constants.mjs";
+import { log } from "../../common/utils.mjs";
 import scrapeless from "../../scrapeless.app.mjs";
 
 export default {
   key: "scrapeless-submit-scrape-job",
   name: "Submit Scrape Job",
   description: "Submit a new web scraping job with specified target URL and extraction rules. [See the documentation](https://apidocs.scrapeless.com/api-11949852)",
-  version: "0.0.1",
+  version: "0.0.2",
   type: "action",
   props: {
     scrapeless,
     actor: {
       type: "string",
       label: "Actor",
+      default: "scraper.shopee",
       description: "The actor to use for the scrape job. This can be a specific user or a system account.",
-      options: ACTOR_OPTIONS,
     },
     inputUrl: {
       type: "string",
       label: "Input URL",
       description: "Target URL to scrape. This is the URL of the web page you want to extract data from.",
-      optional: true,
     },
     proxyCountry: {
       type: "string",
       label: "Proxy Country",
       description: "The country to route the request through. This can help in bypassing geo-restrictions.",
-      optional: true,
-    },
-    additionalInput: {
-      type: "object",
-      label: "Additional Input",
-      description: "Additional input parameters if you need to pass a specific configuration based on the actor. [See the documentation](https://apidocs.scrapeless.com/) for further details.",
-      optional: true,
+      default: "ANY",
+      options: COUNTRY_OPTIONS.map((country) => ({
+        label: country.label,
+        value: country.value,
+      })),
     },
     asyncMode: {
       type: "boolean",
       label: "Async Mode",
+      default: true,
       description: "Whether to run the scrape job in asynchronous mode. If set to true, the job will be processed in the background.",
     },
   },
   async run({ $ }) {
     try {
       const data = {
         actor: this.actor,
-        input: parseObject(this.additionalInput),
-      };
-
-      if (this.asyncMode) {
-        data.async = this.asyncMode;
-      }
-      if (this.inputUrl) {
-        data.input.url = this.inputUrl;
-      }
-      if (this.proxyCountry) {
-        data.proxy = {
+        input: {
+          url: this.inputUrl,
+        },
+        proxy: {
           country: this.proxyCountry,
-        };
-      }
+        },
+        async: this.asyncMode,
+      };
 
       const response = await this.scrapeless.submitScrapeJob({
         $,
         data,
       });
+      log(response);
 
       $.export("$summary", this.asyncMode
         ? `Successfully submitted scrape job with ID: ${response.taskId}`
         : "Successfully scraped the target configuration.");
       return response;
-    } catch ({ response }) {
-      throw new ConfigurationError(response.data.message);
+    } catch (error) {
+      throw new Error(error.message);
     }
   },
 };