Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import FormData from "form-data";
import docparser from "../../docparser.app.mjs";

export default {
key: "docparser-fetch-document-url",
name: "Fetch Document by URL",
description: "Fetches a document from a provided URL and imports it to Docparser for parsing. [See the documentation](https://docparser.com/api/)",
version: "0.0.1",
type: "action",
props: {
docparser,
parserId: {
propDefinition: [
docparser,
"parserId",
],
},
url: {
type: "string",
label: "Document URL",
description: "The URL of the document to be fetched and imported into Docparser.",
},
},
async run({ $ }) {
const data = new FormData();
data.append("url", this.url);

const response = await this.docparser.fetchDocumentFromURL({
$,
parserId: this.parserId,
data,
headers: data.getHeaders(),
});

$.export("$summary", `Document is scheduled to be fetched and processed. Document ID: ${response.document_id}`);
return response;
},
};
40 changes: 40 additions & 0 deletions components/docparser/actions/upload-document/upload-document.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import FormData from "form-data";
import fs from "fs";
import { checkTmp } from "../../common/utils.mjs";
import docparser from "../../docparser.app.mjs";

export default {
key: "docparser-upload-document",
name: "Upload Document",
description: "Uploads a document to docparser that initiates parsing immediately after reception. [See the documentation](https://docparser.com/api/#import-documents)",
version: "0.0.1",
type: "action",
props: {
docparser,
parserId: {
propDefinition: [
docparser,
"parserId",
],
},
file: {
type: "string",
label: "File",
description: "The path to a file in the `/tmp` directory. [See the documentation on working with files](https://pipedream.com/docs/code/nodejs/working-with-files/#writing-a-file-to-tmp)",
},
},
async run({ $ }) {
const data = new FormData();
data.append("file", fs.createReadStream(checkTmp(this.file)));

const response = await this.docparser.uploadDocument({
$,
parserId: this.parserId,
data,
headers: data.getHeaders(),
});

$.export("$summary", `Successfully uploaded document. Document ID: ${response.id}`);
return response;
},
};
6 changes: 6 additions & 0 deletions components/docparser/common/utils.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export const checkTmp = (filename) => {
if (!filename.startsWith("/tmp")) {
return `/tmp/${filename}`;
}
return filename;
};
70 changes: 66 additions & 4 deletions components/docparser/docparser.app.mjs
Original file line number Diff line number Diff line change
@@ -1,11 +1,73 @@
import { axios } from "@pipedream/platform";

export default {
type: "app",
app: "docparser",
propDefinitions: {},
propDefinitions: {
parserId: {
type: "string",
label: "Parser ID",
description: "The ID of the parser to be used.",
async options() {
const parsers = await this.listParsers();
return parsers.map(({
id: value, label,
}) => ({
label,
value,
}));
},
},
},
methods: {
// this.$auth contains connected account data
authKeys() {
console.log(Object.keys(this.$auth));
_baseUrl() {
return "https://api.docparser.com";
},
_auth() {
return {
username: `${this.$auth.api_key}`,
password: "",
};
},
_makeRequest({
$ = this, path, ...opts
}) {
return axios($, {
url: this._baseUrl() + path,
auth: this._auth(),
...opts,
});
},
listData({
parserId, ...opts
}) {
return this._makeRequest({
path: `/v1/results/${parserId}`,
...opts,
});
},
listParsers() {
return this._makeRequest({
path: "/v1/parsers",
});
},
fetchDocumentFromURL({
parserId, ...opts
}) {
return this._makeRequest({
method: "POST",
path: `/v2/document/fetch/${parserId}`,
...opts,
});
},
uploadDocument({
parserId, ...opts
}) {
return this._makeRequest({
method: "POST",
path: `/v1/document/upload/${parserId}`,
...opts,
});
},
},
};
4 changes: 2 additions & 2 deletions components/docparser/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@pipedream/docparser",
"version": "0.6.0",
"version": "0.1.0",
"description": "Pipedream docparser Components",
"main": "docparser.app.mjs",
"keywords": [
Expand All @@ -13,6 +13,6 @@
"access": "public"
},
"dependencies": {
"@pipedream/platform": "^3.0.0"
"@pipedream/platform": "^3.0.3"
}
}
69 changes: 69 additions & 0 deletions components/docparser/sources/common/base.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { DEFAULT_POLLING_SOURCE_TIMER_INTERVAL } from "@pipedream/platform";
import app from "../../docparser.app.mjs";

export default {
props: {
app,
db: "$.service.db",
timer: {
type: "$.interface.timer",
default: {
intervalSeconds: DEFAULT_POLLING_SOURCE_TIMER_INTERVAL,
},
},
parserId: {
propDefinition: [
app,
"parserId",
],
},
},
methods: {
_getLastDate() {
return this.db.get("lastDate") || "1970-01-01T00:00:00";
},
_setLastDate(lastDate) {
this.db.set("lastDate", lastDate);
},
async emitEvent(maxResults = false) {
const lastDate = this._getLastDate();
const fn = this.getFunction();
const params = {
sort_by: "parsed_at",
sort_order: "DESC",
list: "processed_after",
date: lastDate,
};

if (maxResults) {
params.limit = maxResults;
}

const response = await fn({
parserId: this.parserId,
params,
});

if (response.length) {
const dateTime = response[0].processed_at_utc;
this._setLastDate(dateTime.substring(0, dateTime.length - 6));
}

for (const item of response.reverse()) {
this.$emit(item, {
id: item.id,
summary: this.getSummary(item),
ts: Date.parse(item.created || new Date()),
});
}
},
},
hooks: {
async deploy() {
await this.emitEvent(25);
},
},
async run() {
await this.emitEvent();
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import common from "../common/base.mjs";
import sampleEmit from "./test-event.mjs";

export default {
...common,
key: "docparser-new-document-data-available",
name: "New Document Data Available",
description: "Emit new event every time a document is processed and parsed data is available. [See the documentation](https://docparser.com/api/)",
version: "0.0.1",
type: "source",
dedupe: "unique",
methods: {
...common.methods,
getFunction() {
return this.app.listData;
},
getSummary(item) {
return `New Document Parsed: ${item.file_name}`;
},
},
sampleEmit,
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
export default {
"id": "2b11659f161dcd26694e9403fc430cfd",
"document_id": "2b11659f161dcd26694e9403fc430cfd",
"remote_id": "",
"file_name": "file.pdf",
"media_link": "https://api.docparser.com/v1/document/media/2b11659f161dcd26694e9403fc430cfd-2b11659f161dcd26694e9403fc430cfd",
"media_link_original": "https://api.docparser.com/v1/document/media/2b11659f161dcd26694e9403fc430cfd-2b11659f161dcd26694e9403fc430cfd/original",
"media_link_data": "https://api.docparser.com/v1/document/media/2b11659f161dcd26694e9403fc430cfd-2b11659f161dcd26694e9403fc430cfd/data",
"page_count": 5,
"uploaded_at": "2025-04-08T13:32:02+00:00",
"processed_at": "2025-04-08T13:32:02+00:00",
"uploaded_at_utc": "2025-04-08T13:32:02+00:00",
"uploaded_at_user": "2025-04-08T06:32:02+00:00",
"processed_at_utc": "2025-04-08T13:32:02+00:00",
"processed_at_user": "2025-04-08T06:32:02+00:00"
}
Loading
Loading