File tree Expand file tree Collapse file tree 9 files changed +260
-7
lines changed
new-document-data-available Expand file tree Collapse file tree 9 files changed +260
-7
lines changed Original file line number Diff line number Diff line change 1+ import FormData from "form-data" ;
2+ import docparser from "../../docparser.app.mjs" ;
3+
4+ export default {
5+ key : "docparser-fetch-document-url" ,
6+ name : "Fetch Document by URL" ,
7+ description : "Fetches a document from a provided URL and imports it to Docparser for parsing. [See the documentation](https://docparser.com/api/)" ,
8+ version : "0.0.1" ,
9+ type : "action" ,
10+ props : {
11+ docparser,
12+ parserId : {
13+ propDefinition : [
14+ docparser ,
15+ "parserId" ,
16+ ] ,
17+ } ,
18+ url : {
19+ type : "string" ,
20+ label : "Document URL" ,
21+ description : "The URL of the document to be fetched and imported into Docparser." ,
22+ } ,
23+ } ,
24+ async run ( { $ } ) {
25+ const data = new FormData ( ) ;
26+ data . append ( "url" , this . url ) ;
27+
28+ const response = await this . docparser . fetchDocumentFromURL ( {
29+ $,
30+ parserId : this . parserId ,
31+ data,
32+ headers : data . getHeaders ( ) ,
33+ } ) ;
34+
35+ $ . export ( "$summary" , `Document is scheduled to be fetched and processed. Document ID: ${ response . document_id } ` ) ;
36+ return response ;
37+ } ,
38+ } ;
Original file line number Diff line number Diff line change 1+ import FormData from "form-data" ;
2+ import fs from "fs" ;
3+ import { checkTmp } from "../../common/utils.mjs" ;
4+ import docparser from "../../docparser.app.mjs" ;
5+
6+ export default {
7+ key : "docparser-upload-document" ,
8+ name : "Upload Document" ,
9+ description : "Uploads a document to docparser that initiates parsing immediately after reception. [See the documentation](https://docparser.com/api/#import-documents)" ,
10+ version : "0.0.1" ,
11+ type : "action" ,
12+ props : {
13+ docparser,
14+ parserId : {
15+ propDefinition : [
16+ docparser ,
17+ "parserId" ,
18+ ] ,
19+ } ,
20+ file : {
21+ type : "string" ,
22+ label : "File" ,
23+ description : "The path to a file in the `/tmp` directory. [See the documentation on working with files](https://pipedream.com/docs/code/nodejs/working-with-files/#writing-a-file-to-tmp)" ,
24+ } ,
25+ } ,
26+ async run ( { $ } ) {
27+ const data = new FormData ( ) ;
28+ data . append ( "file" , fs . createReadStream ( checkTmp ( this . file ) ) ) ;
29+
30+ const response = await this . docparser . uploadDocument ( {
31+ $,
32+ parserId : this . parserId ,
33+ data,
34+ headers : data . getHeaders ( ) ,
35+ } ) ;
36+
37+ $ . export ( "$summary" , `Successfully uploaded document. Document ID: ${ response . id } ` ) ;
38+ return response ;
39+ } ,
40+ } ;
Original file line number Diff line number Diff line change 1+ export const checkTmp = ( filename ) => {
2+ if ( ! filename . startsWith ( "/tmp" ) ) {
3+ return `/tmp/${ filename } ` ;
4+ }
5+ return filename ;
6+ } ;
Original file line number Diff line number Diff line change 1+ import { axios } from "@pipedream/platform" ;
2+
13export default {
24 type : "app" ,
35 app : "docparser" ,
4- propDefinitions : { } ,
6+ propDefinitions : {
7+ parserId : {
8+ type : "string" ,
9+ label : "Parser ID" ,
10+ description : "The ID of the parser to be used." ,
11+ async options ( ) {
12+ const parsers = await this . listParsers ( ) ;
13+ return parsers . map ( ( {
14+ id : value , label,
15+ } ) => ( {
16+ label,
17+ value,
18+ } ) ) ;
19+ } ,
20+ } ,
21+ } ,
522 methods : {
6- // this.$auth contains connected account data
7- authKeys ( ) {
8- console . log ( Object . keys ( this . $auth ) ) ;
23+ _baseUrl ( ) {
24+ return "https://api.docparser.com" ;
25+ } ,
26+ _auth ( ) {
27+ return {
28+ username : `${ this . $auth . api_key } ` ,
29+ password : "" ,
30+ } ;
31+ } ,
32+ _makeRequest ( {
33+ $ = this , path, ...opts
34+ } ) {
35+ return axios ( $ , {
36+ url : this . _baseUrl ( ) + path ,
37+ auth : this . _auth ( ) ,
38+ ...opts ,
39+ } ) ;
40+ } ,
41+ listData ( {
42+ parserId, ...opts
43+ } ) {
44+ return this . _makeRequest ( {
45+ path : `/v1/results/${ parserId } ` ,
46+ ...opts ,
47+ } ) ;
48+ } ,
49+ listParsers ( ) {
50+ return this . _makeRequest ( {
51+ path : "/v1/parsers" ,
52+ } ) ;
53+ } ,
54+ fetchDocumentFromURL ( {
55+ parserId, ...opts
56+ } ) {
57+ return this . _makeRequest ( {
58+ method : "POST" ,
59+ path : `/v2/document/fetch/${ parserId } ` ,
60+ ...opts ,
61+ } ) ;
62+ } ,
63+ uploadDocument ( {
64+ parserId, ...opts
65+ } ) {
66+ return this . _makeRequest ( {
67+ method : "POST" ,
68+ path : `/v1/document/upload/${ parserId } ` ,
69+ ...opts ,
70+ } ) ;
971 } ,
1072 } ,
1173} ;
Original file line number Diff line number Diff line change 11{
22 "name" : " @pipedream/docparser" ,
3- "version" : " 0.6 .0" ,
3+ "version" : " 0.1 .0" ,
44 "description" : " Pipedream docparser Components" ,
55 "main" : " docparser.app.mjs" ,
66 "keywords" : [
1313 "access" : " public"
1414 },
1515 "dependencies" : {
16- "@pipedream/platform" : " ^3.0.0 "
16+ "@pipedream/platform" : " ^3.0.3 "
1717 }
1818}
Original file line number Diff line number Diff line change 1+ import { DEFAULT_POLLING_SOURCE_TIMER_INTERVAL } from "@pipedream/platform" ;
2+ import app from "../../docparser.app.mjs" ;
3+
4+ export default {
5+ props : {
6+ app,
7+ db : "$.service.db" ,
8+ timer : {
9+ type : "$.interface.timer" ,
10+ default : {
11+ intervalSeconds : DEFAULT_POLLING_SOURCE_TIMER_INTERVAL ,
12+ } ,
13+ } ,
14+ parserId : {
15+ propDefinition : [
16+ app ,
17+ "parserId" ,
18+ ] ,
19+ } ,
20+ } ,
21+ methods : {
22+ _getLastDate ( ) {
23+ return this . db . get ( "lastDate" ) || "1970-01-01T00:00:00" ;
24+ } ,
25+ _setLastDate ( lastDate ) {
26+ this . db . set ( "lastDate" , lastDate ) ;
27+ } ,
28+ async emitEvent ( maxResults = false ) {
29+ const lastDate = this . _getLastDate ( ) ;
30+ const fn = this . getFunction ( ) ;
31+ const params = {
32+ sort_by : "parsed_at" ,
33+ sort_order : "DESC" ,
34+ list : "processed_after" ,
35+ date : lastDate ,
36+ } ;
37+
38+ if ( maxResults ) {
39+ params . limit = maxResults ;
40+ }
41+
42+ const response = await fn ( {
43+ parserId : this . parserId ,
44+ params,
45+ } ) ;
46+
47+ if ( response . length ) {
48+ const dateTime = response [ 0 ] . processed_at_utc ;
49+ this . _setLastDate ( dateTime . substring ( 0 , dateTime . length - 6 ) ) ;
50+ }
51+
52+ for ( const item of response . reverse ( ) ) {
53+ this . $emit ( item , {
54+ id : item . id ,
55+ summary : this . getSummary ( item ) ,
56+ ts : Date . parse ( item . created || new Date ( ) ) ,
57+ } ) ;
58+ }
59+ } ,
60+ } ,
61+ hooks : {
62+ async deploy ( ) {
63+ await this . emitEvent ( 25 ) ;
64+ } ,
65+ } ,
66+ async run ( ) {
67+ await this . emitEvent ( ) ;
68+ } ,
69+ } ;
Original file line number Diff line number Diff line change 1+ import common from "../common/base.mjs" ;
2+ import sampleEmit from "./test-event.mjs" ;
3+
4+ export default {
5+ ...common ,
6+ key : "docparser-new-document-data-available" ,
7+ name : "New Document Data Available" ,
8+ description : "Emit new event every time a document is processed and parsed data is available. [See the documentation](https://docparser.com/api/)" ,
9+ version : "0.0.1" ,
10+ type : "source" ,
11+ dedupe : "unique" ,
12+ methods : {
13+ ...common . methods ,
14+ getFunction ( ) {
15+ return this . app . listData ;
16+ } ,
17+ getSummary ( item ) {
18+ return `New Document Parsed: ${ item . file_name } ` ;
19+ } ,
20+ } ,
21+ sampleEmit,
22+ } ;
Original file line number Diff line number Diff line change 1+ export default {
2+ "id" : "2b11659f161dcd26694e9403fc430cfd" ,
3+ "document_id" : "2b11659f161dcd26694e9403fc430cfd" ,
4+ "remote_id" : "" ,
5+ "file_name" : "file.pdf" ,
6+ "media_link" : "https://api.docparser.com/v1/document/media/2b11659f161dcd26694e9403fc430cfd-2b11659f161dcd26694e9403fc430cfd" ,
7+ "media_link_original" : "https://api.docparser.com/v1/document/media/2b11659f161dcd26694e9403fc430cfd-2b11659f161dcd26694e9403fc430cfd/original" ,
8+ "media_link_data" : "https://api.docparser.com/v1/document/media/2b11659f161dcd26694e9403fc430cfd-2b11659f161dcd26694e9403fc430cfd/data" ,
9+ "page_count" : 5 ,
10+ "uploaded_at" : "2025-04-08T13:32:02+00:00" ,
11+ "processed_at" : "2025-04-08T13:32:02+00:00" ,
12+ "uploaded_at_utc" : "2025-04-08T13:32:02+00:00" ,
13+ "uploaded_at_user" : "2025-04-08T06:32:02+00:00" ,
14+ "processed_at_utc" : "2025-04-08T13:32:02+00:00" ,
15+ "processed_at_user" : "2025-04-08T06:32:02+00:00"
16+ }
You can’t perform that action at this time.
0 commit comments