Skip to content

Commit 2f3a00e

Browse files
authored
New Components - docparser (#16191)
* docparser init * [Components] docparser #13255 Sources - New Document Data Available Actions - Fetch Document URL - Upload Document * pnpm update * remove node_modules
1 parent 1671ff0 commit 2f3a00e

File tree

9 files changed

+260
-7
lines changed

9 files changed

+260
-7
lines changed
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import FormData from "form-data";
2+
import docparser from "../../docparser.app.mjs";
3+
4+
export default {
5+
key: "docparser-fetch-document-url",
6+
name: "Fetch Document by URL",
7+
description: "Fetches a document from a provided URL and imports it to Docparser for parsing. [See the documentation](https://docparser.com/api/)",
8+
version: "0.0.1",
9+
type: "action",
10+
props: {
11+
docparser,
12+
parserId: {
13+
propDefinition: [
14+
docparser,
15+
"parserId",
16+
],
17+
},
18+
url: {
19+
type: "string",
20+
label: "Document URL",
21+
description: "The URL of the document to be fetched and imported into Docparser.",
22+
},
23+
},
24+
async run({ $ }) {
25+
const data = new FormData();
26+
data.append("url", this.url);
27+
28+
const response = await this.docparser.fetchDocumentFromURL({
29+
$,
30+
parserId: this.parserId,
31+
data,
32+
headers: data.getHeaders(),
33+
});
34+
35+
$.export("$summary", `Document is scheduled to be fetched and processed. Document ID: ${response.document_id}`);
36+
return response;
37+
},
38+
};
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import FormData from "form-data";
2+
import fs from "fs";
3+
import { checkTmp } from "../../common/utils.mjs";
4+
import docparser from "../../docparser.app.mjs";
5+
6+
export default {
7+
key: "docparser-upload-document",
8+
name: "Upload Document",
9+
description: "Uploads a document to docparser that initiates parsing immediately after reception. [See the documentation](https://docparser.com/api/#import-documents)",
10+
version: "0.0.1",
11+
type: "action",
12+
props: {
13+
docparser,
14+
parserId: {
15+
propDefinition: [
16+
docparser,
17+
"parserId",
18+
],
19+
},
20+
file: {
21+
type: "string",
22+
label: "File",
23+
description: "The path to a file in the `/tmp` directory. [See the documentation on working with files](https://pipedream.com/docs/code/nodejs/working-with-files/#writing-a-file-to-tmp)",
24+
},
25+
},
26+
async run({ $ }) {
27+
const data = new FormData();
28+
data.append("file", fs.createReadStream(checkTmp(this.file)));
29+
30+
const response = await this.docparser.uploadDocument({
31+
$,
32+
parserId: this.parserId,
33+
data,
34+
headers: data.getHeaders(),
35+
});
36+
37+
$.export("$summary", `Successfully uploaded document. Document ID: ${response.id}`);
38+
return response;
39+
},
40+
};
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
export const checkTmp = (filename) => {
2+
if (!filename.startsWith("/tmp")) {
3+
return `/tmp/${filename}`;
4+
}
5+
return filename;
6+
};
Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,73 @@
1+
import { axios } from "@pipedream/platform";
2+
13
export default {
24
type: "app",
35
app: "docparser",
4-
propDefinitions: {},
6+
propDefinitions: {
7+
parserId: {
8+
type: "string",
9+
label: "Parser ID",
10+
description: "The ID of the parser to be used.",
11+
async options() {
12+
const parsers = await this.listParsers();
13+
return parsers.map(({
14+
id: value, label,
15+
}) => ({
16+
label,
17+
value,
18+
}));
19+
},
20+
},
21+
},
522
methods: {
6-
// this.$auth contains connected account data
7-
authKeys() {
8-
console.log(Object.keys(this.$auth));
23+
_baseUrl() {
24+
return "https://api.docparser.com";
25+
},
26+
_auth() {
27+
return {
28+
username: `${this.$auth.api_key}`,
29+
password: "",
30+
};
31+
},
32+
_makeRequest({
33+
$ = this, path, ...opts
34+
}) {
35+
return axios($, {
36+
url: this._baseUrl() + path,
37+
auth: this._auth(),
38+
...opts,
39+
});
40+
},
41+
listData({
42+
parserId, ...opts
43+
}) {
44+
return this._makeRequest({
45+
path: `/v1/results/${parserId}`,
46+
...opts,
47+
});
48+
},
49+
listParsers() {
50+
return this._makeRequest({
51+
path: "/v1/parsers",
52+
});
53+
},
54+
fetchDocumentFromURL({
55+
parserId, ...opts
56+
}) {
57+
return this._makeRequest({
58+
method: "POST",
59+
path: `/v2/document/fetch/${parserId}`,
60+
...opts,
61+
});
62+
},
63+
uploadDocument({
64+
parserId, ...opts
65+
}) {
66+
return this._makeRequest({
67+
method: "POST",
68+
path: `/v1/document/upload/${parserId}`,
69+
...opts,
70+
});
971
},
1072
},
1173
};

components/docparser/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@pipedream/docparser",
3-
"version": "0.6.0",
3+
"version": "0.1.0",
44
"description": "Pipedream docparser Components",
55
"main": "docparser.app.mjs",
66
"keywords": [
@@ -13,6 +13,6 @@
1313
"access": "public"
1414
},
1515
"dependencies": {
16-
"@pipedream/platform": "^3.0.0"
16+
"@pipedream/platform": "^3.0.3"
1717
}
1818
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import { DEFAULT_POLLING_SOURCE_TIMER_INTERVAL } from "@pipedream/platform";
2+
import app from "../../docparser.app.mjs";
3+
4+
export default {
5+
props: {
6+
app,
7+
db: "$.service.db",
8+
timer: {
9+
type: "$.interface.timer",
10+
default: {
11+
intervalSeconds: DEFAULT_POLLING_SOURCE_TIMER_INTERVAL,
12+
},
13+
},
14+
parserId: {
15+
propDefinition: [
16+
app,
17+
"parserId",
18+
],
19+
},
20+
},
21+
methods: {
22+
_getLastDate() {
23+
return this.db.get("lastDate") || "1970-01-01T00:00:00";
24+
},
25+
_setLastDate(lastDate) {
26+
this.db.set("lastDate", lastDate);
27+
},
28+
async emitEvent(maxResults = false) {
29+
const lastDate = this._getLastDate();
30+
const fn = this.getFunction();
31+
const params = {
32+
sort_by: "parsed_at",
33+
sort_order: "DESC",
34+
list: "processed_after",
35+
date: lastDate,
36+
};
37+
38+
if (maxResults) {
39+
params.limit = maxResults;
40+
}
41+
42+
const response = await fn({
43+
parserId: this.parserId,
44+
params,
45+
});
46+
47+
if (response.length) {
48+
const dateTime = response[0].processed_at_utc;
49+
this._setLastDate(dateTime.substring(0, dateTime.length - 6));
50+
}
51+
52+
for (const item of response.reverse()) {
53+
this.$emit(item, {
54+
id: item.id,
55+
summary: this.getSummary(item),
56+
ts: Date.parse(item.created || new Date()),
57+
});
58+
}
59+
},
60+
},
61+
hooks: {
62+
async deploy() {
63+
await this.emitEvent(25);
64+
},
65+
},
66+
async run() {
67+
await this.emitEvent();
68+
},
69+
};
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import common from "../common/base.mjs";
2+
import sampleEmit from "./test-event.mjs";
3+
4+
export default {
5+
...common,
6+
key: "docparser-new-document-data-available",
7+
name: "New Document Data Available",
8+
description: "Emit new event every time a document is processed and parsed data is available. [See the documentation](https://docparser.com/api/)",
9+
version: "0.0.1",
10+
type: "source",
11+
dedupe: "unique",
12+
methods: {
13+
...common.methods,
14+
getFunction() {
15+
return this.app.listData;
16+
},
17+
getSummary(item) {
18+
return `New Document Parsed: ${item.file_name}`;
19+
},
20+
},
21+
sampleEmit,
22+
};
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
export default {
2+
"id": "2b11659f161dcd26694e9403fc430cfd",
3+
"document_id": "2b11659f161dcd26694e9403fc430cfd",
4+
"remote_id": "",
5+
"file_name": "file.pdf",
6+
"media_link": "https://api.docparser.com/v1/document/media/2b11659f161dcd26694e9403fc430cfd-2b11659f161dcd26694e9403fc430cfd",
7+
"media_link_original": "https://api.docparser.com/v1/document/media/2b11659f161dcd26694e9403fc430cfd-2b11659f161dcd26694e9403fc430cfd/original",
8+
"media_link_data": "https://api.docparser.com/v1/document/media/2b11659f161dcd26694e9403fc430cfd-2b11659f161dcd26694e9403fc430cfd/data",
9+
"page_count": 5,
10+
"uploaded_at": "2025-04-08T13:32:02+00:00",
11+
"processed_at": "2025-04-08T13:32:02+00:00",
12+
"uploaded_at_utc": "2025-04-08T13:32:02+00:00",
13+
"uploaded_at_user": "2025-04-08T06:32:02+00:00",
14+
"processed_at_utc": "2025-04-08T13:32:02+00:00",
15+
"processed_at_user": "2025-04-08T06:32:02+00:00"
16+
}

pnpm-lock.yaml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)