diff --git a/package-lock.json b/package-lock.json index 9ec15c72..09cfae22 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,11 +9,13 @@ "version": "0.18.0", "dependencies": { "async": "^3.2.5", - "pdf-lib": "^1.17.1" + "pdf-lib": "^1.17.1", + "uuid": "^10.0.0" }, "devDependencies": { "@types/async": "^3.2.24", "@types/jest": "^29.5.12", + "@types/uuid": "^10.0.0", "@typescript-eslint/eslint-plugin": "^7.7.1", "@typescript-eslint/parser": "^7.7.1", "eslint": "^8.57.0", @@ -1480,6 +1482,12 @@ "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", "dev": true }, + "node_modules/@types/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==", + "dev": true + }, "node_modules/@types/yargs": { "version": "17.0.32", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz", @@ -6190,6 +6198,18 @@ "punycode": "^2.1.0" } }, + "node_modules/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/v8-compile-cache-lib": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", @@ -7530,6 +7550,12 @@ "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", "dev": true }, + "@types/uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==", + "dev": true + }, "@types/yargs": { "version": "17.0.32", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.32.tgz", @@ -10904,6 +10930,11 @@ "punycode": "^2.1.0" } }, + "uuid": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-10.0.0.tgz", + "integrity": "sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==" + }, "v8-compile-cache-lib": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", diff --git a/package.json b/package.json index f876c207..af26afb0 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "devDependencies": { "@types/async": "^3.2.24", "@types/jest": "^29.5.12", + "@types/uuid": "^10.0.0", "@typescript-eslint/eslint-plugin": "^7.7.1", "@typescript-eslint/parser": "^7.7.1", "eslint": "^8.57.0", @@ -32,6 +33,7 @@ }, "dependencies": { "async": "^3.2.5", - "pdf-lib": "^1.17.1" + "pdf-lib": "^1.17.1", + "uuid": "^10.0.0" } } diff --git a/src/hooks/custom/SplitPdfHook.ts b/src/hooks/custom/SplitPdfHook.ts index f6237836..87904f12 100644 --- a/src/hooks/custom/SplitPdfHook.ts +++ b/src/hooks/custom/SplitPdfHook.ts @@ -1,4 +1,5 @@ import async from "async"; +import { v4 as uuidv4 } from 'uuid'; import { AfterErrorContext, @@ -96,7 +97,11 @@ export class SplitPdfHook hookCtx: BeforeRequestContext, request: Request ): Promise { - const { operationID } = hookCtx; + + // setting the current operationID to be unique + const operationID = "partition-" + uuidv4(); + hookCtx.operationID = operationID; + const requestClone = request.clone(); const formData = await requestClone.formData(); const splitPdfPage = stringToBoolean( diff --git a/test/integration/SplitPdfHook.test.ts b/test/integration/SplitPdfHook.test.ts index ed2b431e..bad3df35 100644 --- a/test/integration/SplitPdfHook.test.ts +++ b/test/integration/SplitPdfHook.test.ts @@ -392,4 +392,78 @@ describe("SplitPDF succeeds for large PDF with high concurrency", () => { expect(res.elements?.length).toBeGreaterThan(0); }, 300000); -}); \ No newline at end of file +}); + + +describe("SplitPDF async can be used to send multiple files concurrently", () => { + const FAKE_API_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + + it.each([ + `${localServer}/general/v0/general`, + ])("succeed", async (serverURL) => { + const client = new UnstructuredClient({ + serverURL: serverURL, + security: { + apiKeyAuth: FAKE_API_KEY, + }, + }); + + const file = { + content: readFileSync("test/data/layout-parser-paper.pdf"), + fileName: "test/data/layout-parser-paper.pdf" + }; + + const RequestsParams = [ + { + files: file, + splitPdfPage: true, + strategy: Strategy.Fast, + splitPdfPageRange: [1, 3], + languages: ["eng"], + }, + { + files: file, + splitPdfPage: true, + strategy: Strategy.Fast, + splitPdfPageRange: [10, 12], + languages: ["eng"], + } + ]; + + // Process requests serially + const serialElements: any[][] = []; + for (const requestParams of RequestsParams) { + const res: PartitionResponse = await client.general.partition({ + partitionParameters: { + ...requestParams + }, + }); + expect(res.statusCode).toEqual(200); + expect(res.elements?.length).toBeGreaterThan(0); + if (res.elements) { + serialElements.push(res.elements); + } + } + + // Process requests concurrently + const concurrentElements: any[][] = []; + const concurrentResponses = await Promise.all(RequestsParams.map(req => + client.general.partition({ + partitionParameters: req + }) + )); + + for (const res of concurrentResponses) { + expect(res.statusCode).toEqual(200); + expect(res.elements?.length).toBeGreaterThan(0); + if (res.elements) { + concurrentElements.push(res.elements); + } + } + + const isEqual = JSON.stringify(serialElements) === JSON.stringify(concurrentElements); + expect(isEqual).toBe(true); + + }, + 300000); +});