Update Scrapeless component to version 0.2.0

joy-chanboop · joy-chanboop · commit cbdef099588f · 2025-06-18T11:04:56.000+08:00
- use nodes integration server to simple request logic
diff --git a/components/scrapeless/actions/scraping-api/scraping-api.mjs b/components/scrapeless/actions/scraping-api/scraping-api.mjs
@@ -30,11 +30,9 @@ export default {
     if (apiServer === "googleSearch") {
       const submitData = {
         actor: "scraper.google.search",
-        input: {
-          q: inputProps.q,
-          hl: inputProps.hl,
-          gl: inputProps.gl,
-        },
+        q: inputProps.q,
+        hl: inputProps.hl,
+        gl: inputProps.gl,
       };
       const response = await scrapeless.scrapingApi({
         $,
diff --git a/components/scrapeless/actions/universal-scraping-api/universal-scraping-api.mjs b/components/scrapeless/actions/universal-scraping-api/universal-scraping-api.mjs
@@ -31,14 +31,10 @@ export default {
     if (apiServer === "webUnlocker") {
       const submitData = {
         actor: "unlocker.webunlocker",
-        input: {
-          url: rest.url,
-          jsRender: rest.jsRender,
-          headless: rest.headless,
-        },
-        proxy: {
-          country: rest.country,
-        },
+        country: rest.country,
+        url: rest.url,
+        jsRender: rest.jsRender,
+        headless: rest.headless,
       };
       const response = await this.scrapeless.universalScrapingApi({
         $,
diff --git a/components/scrapeless/package.json b/components/scrapeless/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@pipedream/scrapeless",
-  "version": "0.1.1",
+  "version": "0.2.0",
   "description": "Pipedream Scrapeless Components",
   "main": "scrapeless.app.mjs",
   "keywords": [
diff --git a/components/scrapeless/scrapeless.app.mjs b/components/scrapeless/scrapeless.app.mjs
@@ -1,18 +1,15 @@
 import { axios } from "@pipedream/platform";
-import {
-  isObject, log, isNullOrUnDef,
-} from "./common/utils.mjs";
 
 export default {
   type: "app",
   app: "scrapeless",
   methods: {
     _baseUrl() {
-      return "https://api.scrapeless.com/api/v1";
+      return "https://scrapeless-nodes.norains.com/api/v1";
     },
     _headers() {
       return {
-        "x-api-token": `${this.$auth.api_key}`,
+        "x-api-key": `${this.$auth.api_key}`,
       };
     },
     _makeRequest({
@@ -27,81 +24,40 @@ export default {
     submitScrapeJob(opts = {}) {
       return this._makeRequest({
         method: "POST",
-        path: "/scraper/request",
+        path: "/nodes/scraper/request",
         ...opts,
       });
     },
     getScrapeResult({ scrapeJobId }) {
       return this._makeRequest({
-        path: `/scraper/result/${scrapeJobId}`,
+        path: `/nodes/scraper/result/${scrapeJobId}`,
       });
     },
     async scrapingApi({ submitData }) {
-      const path = "/scraper/request";
-      const requestWithSync = {
-        ...submitData,
-        async: true,
-      };
+      const path = "/nodes/deepserp";
       const res = await this._makeRequest({
         method: "POST",
         path,
-        data: requestWithSync,
+        data: submitData,
       });
 
-      if (res.data) {
-        return res.data;
-      }
-
-      if (res?.taskId) {
-        log("Waiting for scrape result...");
-
-        while (true) {
-          await new Promise((resolve) => setTimeout(resolve, 1000));
-          const result = await this.getScrapeResult({
-            scrapeJobId: res.taskId,
-          });
-
-          if (isObject(result) && Object.keys(result).length > 0) {
-            log("Scrape result received");
-            return result;
-          }
-
-          if (isNullOrUnDef(result)) {
-            log("Scrape result is undefined");
-            return result;
-          }
-        }
-      }
       return res;
     },
     async universalScrapingApi({ submitData }) {
-      const path = "/unlocker/request";
+      const path = "/nodes/universal-scraping/unlocker";
       const res = await this._makeRequest({
         method: "POST",
         path,
         data: submitData,
       });
-
-      if (res.data) {
-        return res.data;
-      }
-
       return res;
     },
     async crawlerCrawl({ submitData }) {
-      const path = "/crawler/crawl";
-
-      const browserOptions = {
-        "proxy_country": "ANY",
-        "session_name": "Crawl",
-        "session_recording": true,
-        "session_ttl": 900,
-      };
+      const path = "/nodes/crawler/crawl";
 
       const data = {
         url: submitData.url,
         limit: submitData.limit,
-        browserOptions: browserOptions,
       };
 
       const res = await this._makeRequest({
@@ -110,74 +66,13 @@ export default {
         data,
       });
 
-      // get job id
-      if (res.id) {
-        log("Crawl job started");
-        return this.monitorJobStatus(res.id);
-      }
-
       return res;
     },
-    /**
-     * Monitor the status of a crawl job.
-     * @param {string} jobId - The ID of the crawl job.
-     * @param {number} [pollInterval=2] - The interval in seconds to poll for job status.
-     * @returns {Promise<Object>} - The status response of the crawl job.
-     */
-    async monitorJobStatus(jobId, pollInterval = 2) {
-      try {
-        while (true) {
-          let statusResponse = await this._makeRequest({
-            method: "GET",
-            path: `/crawler/crawl/${jobId}`,
-          });
-          log("Crawl job status: ", statusResponse.status);
-          if (statusResponse.status === "completed") {
-            if ("data" in statusResponse) {
-              let data = statusResponse.data;
-              while (typeof statusResponse === "object" && "next" in statusResponse) {
-                if (data.length === 0) break;
-                statusResponse = await this._makeRequest({
-                  method: "GET",
-                  path: statusResponse.next,
-                });
-                data = data.concat(statusResponse.data);
-              }
-              statusResponse.data = data;
-              return statusResponse;
-            } else {
-              throw new Error("Crawl job completed but no data was returned");
-            }
-          } else if ([
-            "active",
-            "paused",
-            "pending",
-            "queued",
-            "waiting",
-            "scraping",
-          ].includes(statusResponse.status)) {
-            pollInterval = Math.max(pollInterval, 2);
-            await new Promise((resolve) => setTimeout(resolve, pollInterval * 1000));
-          } else {
-            throw new Error(`Crawl job failed or was stopped. Status: ${statusResponse.status}`);
-          }
-        }
-      } catch (error) {
-        throw new Error(error.message);
-      }
-    },
     async crawlerScrape({ submitData }) {
-      const path = "/crawler/scrape";
-      const browserOptions = {
-        "proxy_country": "ANY",
-        "session_name": "Scrape",
-        "session_recording": true,
-        "session_ttl": 900,
-      };
+      const path = "/nodes/crawler/scrape";
 
       const data = {
         url: submitData.url,
-        browserOptions: browserOptions,
       };
 
       try {
@@ -186,51 +81,11 @@ export default {
           path,
           data,
         });
-
-        if (!response.id) {
-          throw new Error("Failed to start a scrape job");
-        }
-
-        log("Scrape job started");
-
-        let pollInterval = 2;
-
-        while (true) {
-          const statusResponse = await this.checkScrapeStatus(response.id);
-          log("Scrape job status: ", statusResponse.status);
-          if (statusResponse.status !== "scraping") {
-            return statusResponse;
-          }
-
-          pollInterval = Math.max(pollInterval, 2);
-          await new Promise((resolve) => setTimeout(resolve, pollInterval * 1000));
-        }
-      } catch (error) {
-        throw new Error(error.message);
-      }
-    },
-
-    /**
-     * Check the status of a crawl job.
-     * @param {string} id - The ID of the crawl job.
-     * @returns {Promise<Object>} - The status response of the crawl job.
-     */
-    async checkScrapeStatus(id) {
-      if (!id) {
-        throw new Error("No scrape ID provided");
-      }
-      const url = `/crawler/scrape/${id}`;
-      try {
-        const response = await this._makeRequest({
-          method: "GET",
-          path: url,
-        });
         return response;
       } catch (error) {
         throw new Error(error.message);
       }
     },
-
   },
 
 };

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@pipedream/scrapeless",`
`3`		`- "version": "0.1.1",`
	`3`	`+ "version": "0.2.0",`
`4`	`4`	`"description": "Pipedream Scrapeless Components",`
`5`	`5`	`"main": "scrapeless.app.mjs",`
`6`	`6`	`"keywords": [`