Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import zenscrape from "../../zenscrape.app.mjs";

export default {
key: "zenscrape-get-credit-status",
name: "Get Credit Status",
description: "Retrieve the number of remaining credits in Zenscrape. [See the documentation](https://app.zenscrape.com/documentation)",
version: "0.0.1",
type: "action",
props: {
zenscrape,
},
async run({ $ }) {
const response = await this.zenscrape.getStatus({
$,
});
$.export("$summary", "Successfully retrieved credit status.");
return response;
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import zenscrape from "../../zenscrape.app.mjs";

export default {
key: "zenscrape-get-website-content",
name: "Get Website Content",
description: "Retrieve the content of a website. [See the documentation](https://app.zenscrape.com/documentation)",
version: "0.0.1",
type: "action",
props: {
zenscrape,
url: {
propDefinition: [
zenscrape,
"url",
],
},
premium: {
propDefinition: [
zenscrape,
"premium",
],
},
location: {
propDefinition: [
zenscrape,
"location",
],
},
keepHeaders: {
propDefinition: [
zenscrape,
"keepHeaders",
],
},
render: {
propDefinition: [
zenscrape,
"render",
],
},
},
async run({ $ }) {
const response = await this.zenscrape.getContent({
$,
params: {
url: this.url,
premium: this.premium,
location: this.location,
keep_headers: this.keepHeaders,
render: this.render,
},
});
$.export("$summary", `Successfully scraped website \`${this.url}.\``);
return response;
},
};
8 changes: 6 additions & 2 deletions components/zenscrape/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@pipedream/zenscrape",
"version": "0.0.1",
"version": "0.1.0",
"description": "Pipedream Zenscrape Components",
"main": "zenscrape.app.mjs",
"keywords": [
Expand All @@ -11,5 +11,9 @@
"author": "Pipedream <[email protected]> (https://pipedream.com/)",
"publishConfig": {
"access": "public"
},
"dependencies": {
"@pipedream/platform": "^3.0.3",
"md5": "^2.3.0"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import zenscrape from "../../zenscrape.app.mjs";
import { DEFAULT_POLLING_SOURCE_TIMER_INTERVAL } from "@pipedream/platform";
import md5 from "md5";

export default {
key: "zenscrape-website-content-updated",
name: "Website Content Updated",

Check warning on line 7 in components/zenscrape/sources/website-content-updated/website-content-updated.mjs

View workflow job for this annotation

GitHub Actions / Lint Code Base

Source names should start with "New". See https://pipedream.com/docs/components/guidelines/#source-name
description: "Emit new event when the content of a URL has updated. [See the documentation](https://app.zenscrape.com/documentation)",
version: "0.0.1",
type: "source",
dedupe: "unique",
props: {
zenscrape,
db: "$.service.db",
timer: {
type: "$.interface.timer",
default: {
intervalSeconds: DEFAULT_POLLING_SOURCE_TIMER_INTERVAL,
},
},
url: {
propDefinition: [
zenscrape,
"url",
],
},
premium: {
propDefinition: [
zenscrape,
"premium",
],
},
location: {
propDefinition: [
zenscrape,
"location",
],
},
keepHeaders: {
propDefinition: [
zenscrape,
"keepHeaders",
],
},
render: {
propDefinition: [
zenscrape,
"render",
],
},
},
methods: {
_getContentHash() {
return this.db.get("contentHash");
},
_setContentHash(contentHash) {
this.db.set("contentHash", contentHash);
},
generateMeta() {
const ts = Date.now();
return {
id: ts,
summary: "Website Content Updated",
ts,
};
},
},
async run() {
const contentHash = this._getContentHash();

const content = await this.zenscrape.getContent({
params: {
url: this.url,
premium: this.premium,
location: this.location,
keep_headers: this.keepHeaders,
render: this.render,
},
});

const newContentHash = md5(JSON.stringify(content));

if (newContentHash === contentHash) {
return;
}

this._setContentHash(newContentHash);

const meta = this.generateMeta();
this.$emit(content, meta);
},
};
64 changes: 60 additions & 4 deletions components/zenscrape/zenscrape.app.mjs
Original file line number Diff line number Diff line change
@@ -1,11 +1,67 @@
import { axios } from "@pipedream/platform";

export default {
type: "app",
app: "zenscrape",
propDefinitions: {},
propDefinitions: {
url: {
type: "string",
label: "URL",
description: "The target site you want to scrape",
},
premium: {
type: "boolean",
label: "Premium",
description: "Uses residential proxies, unlocks sites that are hard to scrape. Counts as 20 credits towards your quota.",
optional: true,
},
location: {
type: "string",
label: "Location",
description: "If premium=`false` possible locations are 'na' (North America) and 'eu' (Europe). If premium=`true` you can choose a location from Zenscrape's [list of 230+ countries](https://app.zenscrape.com/documentation#proxyLocationList)",
optional: true,
},
keepHeaders: {
type: "boolean",
label: "Keep Headers",
description: "Allow to pass through forward headers (e.g. user agents, cookies)",
optional: true,
},
render: {
type: "boolean",
label: "Render",
description: "Use a headless browser to fetch content that relies on javascript. Counts as 5 credits towards your quota.",
optional: true,
},
},
methods: {
// this.$auth contains connected account data
authKeys() {
console.log(Object.keys(this.$auth));
_baseUrl() {
return "https://app.zenscrape.com/api/v1";
},
_makeRequest({
$ = this,
path,
...opts
}) {
return axios($, {
url: `${this._baseUrl()}${path}`,
headers: {
apikey: this.$auth.api_key,
},
...opts,
});
},
getContent(opts = {}) {
return this._makeRequest({
path: "/get",
...opts,
});
},
getStatus(opts = {}) {
return this._makeRequest({
path: "/status",
...opts,
});
},
},
};
17 changes: 12 additions & 5 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading