Skip to content

Commit ac8140f

Browse files
committed
[Components] docparser #13255
Sources - New Document Data Available Actions - Fetch Document URL - Upload Document
1 parent 8a01a44 commit ac8140f

File tree

9 files changed

+162
-197
lines changed

9 files changed

+162
-197
lines changed

components/docparser/actions/fetch-document-url/fetch-document-url.mjs

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,35 @@
1+
import FormData from "form-data";
12
import docparser from "../../docparser.app.mjs";
2-
import { axios } from "@pipedream/platform";
33

44
export default {
55
key: "docparser-fetch-document-url",
66
name: "Fetch Document by URL",
77
description: "Fetches a document from a provided URL and imports it to Docparser for parsing. [See the documentation](https://docparser.com/api/)",
8-
version: "0.0.{{ts}}",
8+
version: "0.0.1",
99
type: "action",
1010
props: {
1111
docparser,
12-
url: {
13-
propDefinition: [
14-
docparser,
15-
"url",
16-
],
17-
},
1812
parserId: {
1913
propDefinition: [
2014
docparser,
2115
"parserId",
2216
],
2317
},
18+
url: {
19+
type: "string",
20+
label: "Document URL",
21+
description: "The URL of the document to be fetched and imported into Docparser.",
22+
},
2423
},
2524
async run({ $ }) {
25+
const data = new FormData();
26+
data.append("url", this.url);
27+
2628
const response = await this.docparser.fetchDocumentFromURL({
29+
$,
2730
parserId: this.parserId,
28-
url: this.url,
31+
data,
32+
headers: data.getHeaders(),
2933
});
3034

3135
$.export("$summary", `Document is scheduled to be fetched and processed. Document ID: ${response.document_id}`);

components/docparser/actions/upload-document/upload-document.mjs

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1+
import FormData from "form-data";
2+
import fs from "fs";
3+
import { checkTmp } from "../../common/utils.mjs";
14
import docparser from "../../docparser.app.mjs";
2-
import { axios } from "@pipedream/platform";
35

46
export default {
57
key: "docparser-upload-document",
68
name: "Upload Document",
7-
description: "Uploads a document to docparser that initiates parsing immediately after reception. [See the documentation](https://docparser.com/api/)",
8-
version: "0.0.{{ts}}",
9+
description: "Uploads a document to docparser that initiates parsing immediately after reception. [See the documentation](https://docparser.com/api/#import-documents)",
10+
version: "0.0.1",
911
type: "action",
1012
props: {
1113
docparser,
@@ -16,16 +18,20 @@ export default {
1618
],
1719
},
1820
file: {
19-
propDefinition: [
20-
docparser,
21-
"file",
22-
],
21+
type: "string",
22+
label: "File",
23+
description: "The path to a file in the `/tmp` directory. [See the documentation on working with files](https://pipedream.com/docs/code/nodejs/working-with-files/#writing-a-file-to-tmp)",
2324
},
2425
},
2526
async run({ $ }) {
27+
const data = new FormData();
28+
data.append("file", fs.createReadStream(checkTmp(this.file)));
29+
2630
const response = await this.docparser.uploadDocument({
31+
$,
2732
parserId: this.parserId,
28-
file: this.file,
33+
data,
34+
headers: data.getHeaders(),
2935
});
3036

3137
$.export("$summary", `Successfully uploaded document. Document ID: ${response.id}`);
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
export const checkTmp = (filename) => {
2+
if (!filename.startsWith("/tmp")) {
3+
return `/tmp/${filename}`;
4+
}
5+
return filename;
6+
};

components/docparser/docparser.app.mjs

Lines changed: 32 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,17 @@ export default {
44
type: "app",
55
app: "docparser",
66
propDefinitions: {
7-
url: {
8-
type: "string",
9-
label: "Document URL",
10-
description: "The URL of the document to be fetched and imported into Docparser.",
11-
},
12-
file: {
13-
type: "string",
14-
label: "File Content",
15-
description: "The content of the file to be uploaded, encoded in base64.",
16-
},
177
parserId: {
188
type: "string",
199
label: "Parser ID",
2010
description: "The ID of the parser to be used.",
2111
async options() {
2212
const parsers = await this.listParsers();
23-
return parsers.map((parser) => ({
24-
label: parser.name,
25-
value: parser.id,
13+
return parsers.map(({
14+
id: value, label,
15+
}) => ({
16+
label,
17+
value,
2618
}));
2719
},
2820
},
@@ -31,53 +23,51 @@ export default {
3123
_baseUrl() {
3224
return "https://api.docparser.com";
3325
},
34-
async _makeRequest(opts = {}) {
35-
const {
36-
$ = this, method = "GET", path, headers, ...otherOpts
37-
} = opts;
26+
_auth() {
27+
return {
28+
username: `${this.$auth.api_key}`,
29+
password: "",
30+
};
31+
},
32+
_makeRequest({
33+
$ = this, path, ...opts
34+
}) {
3835
return axios($, {
39-
...otherOpts,
40-
method,
4136
url: this._baseUrl() + path,
42-
headers: {
43-
...headers,
44-
Authorization: `Bearer ${this.$auth.api_key}`,
45-
},
37+
auth: this._auth(),
38+
...opts,
4639
});
4740
},
48-
async listParsers() {
41+
listData({
42+
parserId, ...opts
43+
}) {
44+
return this._makeRequest({
45+
path: `/v1/results/${parserId}`,
46+
...opts,
47+
});
48+
},
49+
listParsers() {
4950
return this._makeRequest({
50-
path: "/v2/parsers",
51+
path: "/v1/parsers",
5152
});
5253
},
53-
async fetchDocumentFromURL({
54-
parserId, url,
54+
fetchDocumentFromURL({
55+
parserId, ...opts
5556
}) {
5657
return this._makeRequest({
5758
method: "POST",
5859
path: `/v2/document/fetch/${parserId}`,
59-
data: {
60-
url,
61-
},
60+
...opts,
6261
});
6362
},
64-
async uploadDocument({
65-
parserId, file,
63+
uploadDocument({
64+
parserId, ...opts
6665
}) {
6766
return this._makeRequest({
6867
method: "POST",
6968
path: `/v1/document/upload/${parserId}`,
70-
data: {
71-
file_content: file,
72-
},
69+
...opts,
7370
});
7471
},
75-
async pollParsedData() {
76-
// Implement logic to emit new events when parsed data is available
77-
},
78-
async pollParsedTableRows() {
79-
// Implement logic to emit new events when parsed table rows are available
80-
},
8172
},
82-
version: "0.0.{{{ts}}}", // Ensure the version is set as per requirements
8373
};

components/docparser/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@pipedream/docparser",
3-
"version": "0.6.0",
3+
"version": "0.1.0",
44
"description": "Pipedream docparser Components",
55
"main": "docparser.app.mjs",
66
"keywords": [
@@ -13,6 +13,6 @@
1313
"access": "public"
1414
},
1515
"dependencies": {
16-
"@pipedream/platform": "^3.0.0"
16+
"@pipedream/platform": "^3.0.3"
1717
}
1818
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import { DEFAULT_POLLING_SOURCE_TIMER_INTERVAL } from "@pipedream/platform";
2+
import app from "../../docparser.app.mjs";
3+
4+
export default {
5+
props: {
6+
app,
7+
db: "$.service.db",
8+
timer: {
9+
type: "$.interface.timer",
10+
default: {
11+
intervalSeconds: DEFAULT_POLLING_SOURCE_TIMER_INTERVAL,
12+
},
13+
},
14+
parserId: {
15+
propDefinition: [
16+
app,
17+
"parserId",
18+
],
19+
},
20+
},
21+
methods: {
22+
_getLastDate() {
23+
return this.db.get("lastDate") || "1970-01-01T00:00:00";
24+
},
25+
_setLastDate(lastDate) {
26+
this.db.set("lastDate", lastDate);
27+
},
28+
async emitEvent(maxResults = false) {
29+
const lastDate = this._getLastDate();
30+
const fn = this.getFunction();
31+
const params = {
32+
sort_by: "parsed_at",
33+
sort_order: "DESC",
34+
list: "processed_after",
35+
date: lastDate,
36+
};
37+
38+
if (maxResults) {
39+
params.limit = maxResults;
40+
}
41+
42+
const response = await fn({
43+
parserId: this.parserId,
44+
params,
45+
});
46+
47+
if (response.length) {
48+
const dateTime = response[0].processed_at_utc;
49+
this._setLastDate(dateTime.substring(0, dateTime.length - 6));
50+
}
51+
52+
for (const item of response.reverse()) {
53+
this.$emit(item, {
54+
id: item.id,
55+
summary: this.getSummary(item),
56+
ts: Date.parse(item.created || new Date()),
57+
});
58+
}
59+
},
60+
},
61+
hooks: {
62+
async deploy() {
63+
await this.emitEvent(25);
64+
},
65+
},
66+
async run() {
67+
await this.emitEvent();
68+
},
69+
};
Lines changed: 10 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,22 @@
1-
import docparser from "../../docparser.app.mjs";
2-
import { axios } from "@pipedream/platform";
1+
import common from "../common/base.mjs";
2+
import sampleEmit from "./test-event.mjs";
33

44
export default {
5+
...common,
56
key: "docparser-new-document-data-available",
67
name: "New Document Data Available",
78
description: "Emit new event every time a document is processed and parsed data is available. [See the documentation](https://docparser.com/api/)",
8-
version: "0.0.{{ts}}",
9+
version: "0.0.1",
910
type: "source",
1011
dedupe: "unique",
11-
props: {
12-
docparser,
13-
db: "$.service.db",
14-
timer: {
15-
type: "$.interface.timer",
16-
default: {
17-
intervalSeconds: 60,
18-
},
19-
},
20-
parserId: {
21-
propDefinition: [
22-
docparser,
23-
"parserId",
24-
],
25-
},
26-
},
2712
methods: {
28-
_getLastFetchedId() {
29-
return this.db.get("lastFetchedId") || null;
30-
},
31-
_setLastFetchedId(id) {
32-
this.db.set("lastFetchedId", id);
13+
...common.methods,
14+
getFunction() {
15+
return this.app.listData;
3316
},
34-
async getParsedData() {
35-
return await this.docparser._makeRequest({
36-
path: `/v2/results/${this.parserId}`,
37-
});
17+
getSummary(item) {
18+
return `New Document Parsed: ${item.file_name}`;
3819
},
3920
},
40-
hooks: {
41-
async deploy() {
42-
const parsedData = await this.getParsedData();
43-
const eventsToEmit = parsedData.slice(-50);
44-
45-
for (const event of eventsToEmit) {
46-
this.$emit(event, {
47-
id: event.id,
48-
summary: `New Document Parsed: ${event.file_name}`,
49-
ts: Date.parse(event.parsed_at),
50-
});
51-
}
52-
53-
if (eventsToEmit.length > 0) {
54-
const lastEvent = eventsToEmit[eventsToEmit.length - 1];
55-
this._setLastFetchedId(lastEvent.id);
56-
}
57-
},
58-
async activate() {
59-
console.log("Component activated");
60-
},
61-
async deactivate() {
62-
console.log("Component deactivated");
63-
},
64-
},
65-
async run() {
66-
const lastFetchedId = this._getLastFetchedId();
67-
const parsedData = await this.getParsedData();
68-
69-
for (const event of parsedData) {
70-
if (!lastFetchedId || event.id > lastFetchedId) {
71-
this.$emit(event, {
72-
id: event.id,
73-
summary: `New Document Parsed: ${event.file_name}`,
74-
ts: Date.parse(event.parsed_at),
75-
});
76-
}
77-
}
78-
79-
if (parsedData.length > 0) {
80-
const lastEvent = parsedData[parsedData.length - 1];
81-
this._setLastFetchedId(lastEvent.id);
82-
}
83-
},
21+
sampleEmit,
8422
};

0 commit comments

Comments
 (0)