diff --git a/components/_2markdown/_2markdown.app.mjs b/components/_2markdown/_2markdown.app.mjs index aabe2f8b857ed..5996f3464f0b9 100644 --- a/components/_2markdown/_2markdown.app.mjs +++ b/components/_2markdown/_2markdown.app.mjs @@ -1,11 +1,76 @@ +import { axios } from "@pipedream/platform"; + export default { type: "app", app: "_2markdown", - propDefinitions: {}, + propDefinitions: { + filePath: { + type: "string", + label: "File Path", + description: "The path to an HTML file in the `/tmp` directory. [See the documentation on working with files](https://pipedream.com/docs/code/nodejs/working-with-files/#writing-a-file-to-tmp)", + }, + }, methods: { - // this.$auth contains connected account data - authKeys() { - console.log(Object.keys(this.$auth)); + _baseUrl() { + return "https://api.2markdown.com/v1"; + }, + _makeRequest({ + $ = this, + path, + headers, + ...opts + }) { + return axios($, { + url: `${this._baseUrl()}${path}`, + headers: { + ...headers, + "X-Api-Key": this.$auth.api_key, + }, + ...opts, + }); + }, + getJobStatus({ + jobId, ...opts + }) { + return this._makeRequest({ + path: `/pdf2md/${jobId}`, + ...opts, + }); + }, + pdfToMarkdown(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/pdf2md", + ...opts, + }); + }, + urlToMarkdown(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/url2md", + ...opts, + }); + }, + urlToMarkdownWithJs(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/url2mdjs", + ...opts, + }); + }, + htmlFileToMarkdown(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/file2md", + ...opts, + }); + }, + htmlToMarkdown(opts = {}) { + return this._makeRequest({ + method: "POST", + path: "/html2md", + ...opts, + }); }, }, }; diff --git a/components/_2markdown/actions/html-file-to-markdown/html-file-to-markdown.mjs b/components/_2markdown/actions/html-file-to-markdown/html-file-to-markdown.mjs new file mode 100644 index 0000000000000..c5e8510e1e639 --- /dev/null +++ b/components/_2markdown/actions/html-file-to-markdown/html-file-to-markdown.mjs @@ -0,0 +1,37 @@ +import _2markdown from "../../_2markdown.app.mjs"; +import fs from "fs"; +import FormData from "form-data"; + +export default { + key: "_2markdown-html-file-to-markdown", + name: "HTML File to Markdown", + description: "Convert an HTML file to Markdown format. [See the documentation](https://2markdown.com/docs#file2md)", + version: "0.0.1", + type: "action", + props: { + _2markdown, + filePath: { + propDefinition: [ + _2markdown, + "filePath", + ], + }, + }, + async run({ $ }) { + const form = new FormData(); + + form.append("document", fs.createReadStream(this.filePath.includes("tmp/") + ? this.filePath + : `/tmp/${this.filePath}`)); + + const response = await this._2markdown.htmlFileToMarkdown({ + $, + headers: form.getHeaders(), + data: form, + }); + + $.export("$summary", "Successfully converted HTML file to markdown."); + + return response; + }, +}; diff --git a/components/_2markdown/actions/html-to-markdown/html-to-markdown.mjs b/components/_2markdown/actions/html-to-markdown/html-to-markdown.mjs new file mode 100644 index 0000000000000..133616d409059 --- /dev/null +++ b/components/_2markdown/actions/html-to-markdown/html-to-markdown.mjs @@ -0,0 +1,29 @@ +import _2markdown from "../../_2markdown.app.mjs"; + +export default { + key: "_2markdown-html-to-markdown", + name: "HTML to Markdown", + description: "Convert raw HTML content to Markdown format. [See the documentation](https://2markdown.com/docs#html2md)", + version: "0.0.1", + type: "action", + props: { + _2markdown, + html: { + type: "string", + label: "HTML", + description: "The HTML content to be converted to Markdown", + }, + }, + async run({ $ }) { + const response = await this._2markdown.htmlToMarkdown({ + $, + data: { + html: this.html, + }, + }); + + $.export("$summary", "Successfully converted HTML to markdown."); + + return response; + }, +}; diff --git a/components/_2markdown/actions/pdf-to-markdown/pdf-to-markdown.mjs b/components/_2markdown/actions/pdf-to-markdown/pdf-to-markdown.mjs new file mode 100644 index 0000000000000..597a80fdb6812 --- /dev/null +++ b/components/_2markdown/actions/pdf-to-markdown/pdf-to-markdown.mjs @@ -0,0 +1,58 @@ +import _2markdown from "../../_2markdown.app.mjs"; +import fs from "fs"; +import FormData from "form-data"; + +export default { + key: "_2markdown-pdf-to-markdown", + name: "PDF to Markdown", + description: "Convert a PDF document to Markdown format. [See the documentation](https://2markdown.com/docs#pdf2md)", + version: "0.0.1", + type: "action", + props: { + _2markdown, + filePath: { + propDefinition: [ + _2markdown, + "filePath", + ], + description: "The path to a PDF file in the `/tmp` directory. [See the documentation on working with files](https://pipedream.com/docs/code/nodejs/working-with-files/#writing-a-file-to-tmp)", + }, + waitForCompletion: { + type: "boolean", + label: "Wait for Completion", + description: "Set to `true` to poll the API in 3-second intervals until the job is complete", + optional: true, + }, + }, + async run({ $ }) { + const form = new FormData(); + + form.append("document", fs.createReadStream(this.filePath.includes("tmp/") + ? this.filePath + : `/tmp/${this.filePath}`)); + + let response = await this._2markdown.pdfToMarkdown({ + $, + headers: form.getHeaders(), + data: form, + }); + + if (this.waitForCompletion) { + const timer = (ms) => new Promise((res) => setTimeout(res, ms)); + const jobId = response.jobId; + while (response.status === "processing" || response.status === "pending") { + response = await this._2markdown.getJobStatus({ + $, + jobId, + }); + await timer(3000); + } + } + + $.export("$summary", `${this.waitForCompletion + ? "Finished" + : "Started"} converting PDF file to markdown.`); + + return response; + }, +}; diff --git a/components/_2markdown/actions/url-to-markdown/url-to-markdown.mjs b/components/_2markdown/actions/url-to-markdown/url-to-markdown.mjs new file mode 100644 index 0000000000000..f36e22581f44e --- /dev/null +++ b/components/_2markdown/actions/url-to-markdown/url-to-markdown.mjs @@ -0,0 +1,39 @@ +import _2markdown from "../../_2markdown.app.mjs"; + +export default { + key: "_2markdown-url-to-markdown", + name: "URL to Markdown", + description: "Extract the essential content of a website as plaintext. [See the documentation](https://2markdown.com/docs#url2md)", + version: "0.0.1", + type: "action", + props: { + _2markdown, + url: { + type: "string", + label: "URL", + description: "The URL to be processed. Costs 1 credit per request.", + }, + js: { + type: "boolean", + label: "Include Javascript Support", + description: "Set to `true` to extract content as plaintext including any javascript-rendered resources. Costs an additional credit per request.", + optional: true, + }, + }, + async run({ $ }) { + const fn = this.js + ? this._2markdown.urlToMarkdownWithJs + : this._2markdown.urlToMarkdown; + + const response = await fn({ + $, + data: { + url: this.url, + }, + }); + + $.export("$summary", "Successfully extracted website content."); + + return response; + }, +}; diff --git a/components/_2markdown/package.json b/components/_2markdown/package.json index de2a6d7f40ec4..9df443a7fe99d 100644 --- a/components/_2markdown/package.json +++ b/components/_2markdown/package.json @@ -1,6 +1,6 @@ { "name": "@pipedream/_2markdown", - "version": "0.0.1", + "version": "0.1.0", "description": "Pipedream 2markdown Components", "main": "_2markdown.app.mjs", "keywords": [ @@ -11,5 +11,9 @@ "author": "Pipedream (https://pipedream.com/)", "publishConfig": { "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.0.3", + "form-data": "^4.0.1" } -} \ No newline at end of file +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index eb428226e9e9b..f799a7602a604 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -145,7 +145,14 @@ importers: specifier: ^3.0.0 version: 3.0.3 - components/_2markdown: {} + components/_2markdown: + dependencies: + '@pipedream/platform': + specifier: ^3.0.3 + version: 3.0.3 + form-data: + specifier: ^4.0.1 + version: 4.0.1 components/_360nrs: dependencies: @@ -292,8 +299,7 @@ importers: specifier: ^3.0.1 version: 3.0.3 - components/adobe_document_generation_api: - specifiers: {} + components/adobe_document_generation_api: {} components/adobe_pdf_services: dependencies: @@ -5049,8 +5055,7 @@ importers: components/humanitix: {} - components/humanlayer: - specifiers: {} + components/humanlayer: {} components/humanloop: {} @@ -6521,8 +6526,7 @@ importers: specifier: ^3.0.3 version: 3.0.3 - components/microsoft_dynamics_365_sales: - specifiers: {} + components/microsoft_dynamics_365_sales: {} components/microsoft_entra_id: dependencies: @@ -8015,8 +8019,7 @@ importers: specifier: ^1.6.0 version: 1.6.6 - components/planhat: - specifiers: {} + components/planhat: {} components/planly: {} @@ -10510,8 +10513,7 @@ importers: components/syncro: {} - components/synthflow: - specifiers: {} + components/synthflow: {} components/t2m_url_shortener: {} @@ -31763,6 +31765,8 @@ snapshots: '@putout/operator-filesystem': 5.0.0(putout@36.13.1(eslint@8.57.1)(typescript@5.6.3)) '@putout/operator-json': 2.2.0 putout: 36.13.1(eslint@8.57.1)(typescript@5.6.3) + transitivePeerDependencies: + - supports-color '@putout/operator-regexp@1.0.0(putout@36.13.1(eslint@8.57.1)(typescript@5.6.3))': dependencies: