From 03d48cf2736a768c8aaa26f9cb040d1f25cb07b3 Mon Sep 17 00:00:00 2001 From: Jordan Homan Date: Tue, 1 Oct 2024 15:35:52 -0400 Subject: [PATCH] fixed input to extractImageBlockType to be sent to the server properly --- src/hooks/custom/common.ts | 2 ++ src/hooks/custom/utils/request.ts | 10 ++++++++++ test/integration/SplitPdfHook.test.ts | 3 ++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/hooks/custom/common.ts b/src/hooks/custom/common.ts index 005482aa..dbeaf193 100644 --- a/src/hooks/custom/common.ts +++ b/src/hooks/custom/common.ts @@ -18,6 +18,8 @@ export const PARTITION_FORM_SPLIT_PDF_PAGE_RANGE_KEY = "split_pdf_page_range"; export const PARTITION_FORM_SPLIT_PDF_CONCURRENCY_LEVEL = "split_pdf_concurrency_level"; +export const EXTRACT_IMAGE_BLOCK_TYPES = "extract_image_block_types"; + export const DEFAULT_STARTING_PAGE_NUMBER = 1; export const DEFAULT_NUMBER_OF_PARALLEL_REQUESTS = 8; export const DEFAULT_SPLIT_PDF_ALLOW_FAILED_KEY = false; diff --git a/src/hooks/custom/utils/request.ts b/src/hooks/custom/utils/request.ts index 6ecf7111..dd55eb56 100644 --- a/src/hooks/custom/utils/request.ts +++ b/src/hooks/custom/utils/request.ts @@ -1,4 +1,5 @@ import { + EXTRACT_IMAGE_BLOCK_TYPES, PARTITION_FORM_FILES_KEY, PARTITION_FORM_SPLIT_PDF_PAGE_KEY, PARTITION_FORM_STARTING_PAGE_NUMBER_KEY, @@ -90,5 +91,14 @@ export async function prepareRequestBody( PARTITION_FORM_STARTING_PAGE_NUMBER_KEY, startingPageNumber.toString() ); + + if (formData.has(EXTRACT_IMAGE_BLOCK_TYPES)) { + newFormData.delete(EXTRACT_IMAGE_BLOCK_TYPES); + const extractImageBlockTypes = (formData.get(EXTRACT_IMAGE_BLOCK_TYPES)?.toString() || "").split(","); + for(const blockType of extractImageBlockTypes) { + newFormData.append(EXTRACT_IMAGE_BLOCK_TYPES, blockType); + } + } + return newFormData; } diff --git a/test/integration/SplitPdfHook.test.ts b/test/integration/SplitPdfHook.test.ts index ed2b431e..f3aa856a 100644 --- a/test/integration/SplitPdfHook.test.ts +++ b/test/integration/SplitPdfHook.test.ts @@ -379,7 +379,8 @@ describe("SplitPDF succeeds for large PDF with high concurrency", () => { splitPdfPage: true, strategy: Strategy.HiRes, splitPdfAllowFailed: false, - splitPdfConcurrencyLevel: 15 + splitPdfConcurrencyLevel: 15, + extractImageBlockTypes: ["Image", "Table"] }; const res: PartitionResponse = await client.general.partition({