Skip to content

Commit 7075033

Browse files
authored
Merge branch 'main' into fix_extract_image_block_type
2 parents 03d48cf + dbfb84c commit 7075033

File tree

4 files changed

+116
-4
lines changed

4 files changed

+116
-4
lines changed

package-lock.json

Lines changed: 32 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"devDependencies": {
2121
"@types/async": "^3.2.24",
2222
"@types/jest": "^29.5.12",
23+
"@types/uuid": "^10.0.0",
2324
"@typescript-eslint/eslint-plugin": "^7.7.1",
2425
"@typescript-eslint/parser": "^7.7.1",
2526
"eslint": "^8.57.0",
@@ -32,6 +33,7 @@
3233
},
3334
"dependencies": {
3435
"async": "^3.2.5",
35-
"pdf-lib": "^1.17.1"
36+
"pdf-lib": "^1.17.1",
37+
"uuid": "^10.0.0"
3638
}
3739
}

src/hooks/custom/SplitPdfHook.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import async from "async";
2+
import { v4 as uuidv4 } from 'uuid';
23

34
import {
45
AfterErrorContext,
@@ -96,7 +97,11 @@ export class SplitPdfHook
9697
hookCtx: BeforeRequestContext,
9798
request: Request
9899
): Promise<Request> {
99-
const { operationID } = hookCtx;
100+
101+
// setting the current operationID to be unique
102+
const operationID = "partition-" + uuidv4();
103+
hookCtx.operationID = operationID;
104+
100105
const requestClone = request.clone();
101106
const formData = await requestClone.formData();
102107
const splitPdfPage = stringToBoolean(

test/integration/SplitPdfHook.test.ts

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,4 +393,78 @@ describe("SplitPDF succeeds for large PDF with high concurrency", () => {
393393
expect(res.elements?.length).toBeGreaterThan(0);
394394
},
395395
300000);
396-
});
396+
});
397+
398+
399+
describe("SplitPDF async can be used to send multiple files concurrently", () => {
400+
const FAKE_API_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
401+
402+
it.each([
403+
`${localServer}/general/v0/general`,
404+
])("succeed", async (serverURL) => {
405+
const client = new UnstructuredClient({
406+
serverURL: serverURL,
407+
security: {
408+
apiKeyAuth: FAKE_API_KEY,
409+
},
410+
});
411+
412+
const file = {
413+
content: readFileSync("test/data/layout-parser-paper.pdf"),
414+
fileName: "test/data/layout-parser-paper.pdf"
415+
};
416+
417+
const RequestsParams = [
418+
{
419+
files: file,
420+
splitPdfPage: true,
421+
strategy: Strategy.Fast,
422+
splitPdfPageRange: [1, 3],
423+
languages: ["eng"],
424+
},
425+
{
426+
files: file,
427+
splitPdfPage: true,
428+
strategy: Strategy.Fast,
429+
splitPdfPageRange: [10, 12],
430+
languages: ["eng"],
431+
}
432+
];
433+
434+
// Process requests serially
435+
const serialElements: any[][] = [];
436+
for (const requestParams of RequestsParams) {
437+
const res: PartitionResponse = await client.general.partition({
438+
partitionParameters: {
439+
...requestParams
440+
},
441+
});
442+
expect(res.statusCode).toEqual(200);
443+
expect(res.elements?.length).toBeGreaterThan(0);
444+
if (res.elements) {
445+
serialElements.push(res.elements);
446+
}
447+
}
448+
449+
// Process requests concurrently
450+
const concurrentElements: any[][] = [];
451+
const concurrentResponses = await Promise.all(RequestsParams.map(req =>
452+
client.general.partition({
453+
partitionParameters: req
454+
})
455+
));
456+
457+
for (const res of concurrentResponses) {
458+
expect(res.statusCode).toEqual(200);
459+
expect(res.elements?.length).toBeGreaterThan(0);
460+
if (res.elements) {
461+
concurrentElements.push(res.elements);
462+
}
463+
}
464+
465+
const isEqual = JSON.stringify(serialElements) === JSON.stringify(concurrentElements);
466+
expect(isEqual).toBe(true);
467+
468+
},
469+
300000);
470+
});

0 commit comments

Comments
 (0)