diff --git a/src/hooks/custom/common.ts b/src/hooks/custom/common.ts index 005482aa..dbeaf193 100644 --- a/src/hooks/custom/common.ts +++ b/src/hooks/custom/common.ts @@ -18,6 +18,8 @@ export const PARTITION_FORM_SPLIT_PDF_PAGE_RANGE_KEY = "split_pdf_page_range"; export const PARTITION_FORM_SPLIT_PDF_CONCURRENCY_LEVEL = "split_pdf_concurrency_level"; +export const EXTRACT_IMAGE_BLOCK_TYPES = "extract_image_block_types"; + export const DEFAULT_STARTING_PAGE_NUMBER = 1; export const DEFAULT_NUMBER_OF_PARALLEL_REQUESTS = 8; export const DEFAULT_SPLIT_PDF_ALLOW_FAILED_KEY = false; diff --git a/src/hooks/custom/utils/request.ts b/src/hooks/custom/utils/request.ts index 6ecf7111..dd55eb56 100644 --- a/src/hooks/custom/utils/request.ts +++ b/src/hooks/custom/utils/request.ts @@ -1,4 +1,5 @@ import { + EXTRACT_IMAGE_BLOCK_TYPES, PARTITION_FORM_FILES_KEY, PARTITION_FORM_SPLIT_PDF_PAGE_KEY, PARTITION_FORM_STARTING_PAGE_NUMBER_KEY, @@ -90,5 +91,14 @@ export async function prepareRequestBody( PARTITION_FORM_STARTING_PAGE_NUMBER_KEY, startingPageNumber.toString() ); + + if (formData.has(EXTRACT_IMAGE_BLOCK_TYPES)) { + newFormData.delete(EXTRACT_IMAGE_BLOCK_TYPES); + const extractImageBlockTypes = (formData.get(EXTRACT_IMAGE_BLOCK_TYPES)?.toString() || "").split(","); + for(const blockType of extractImageBlockTypes) { + newFormData.append(EXTRACT_IMAGE_BLOCK_TYPES, blockType); + } + } + return newFormData; } diff --git a/test/integration/SplitPdfHook.test.ts b/test/integration/SplitPdfHook.test.ts index bad3df35..ff69505a 100644 --- a/test/integration/SplitPdfHook.test.ts +++ b/test/integration/SplitPdfHook.test.ts @@ -379,7 +379,8 @@ describe("SplitPDF succeeds for large PDF with high concurrency", () => { splitPdfPage: true, strategy: Strategy.HiRes, splitPdfAllowFailed: false, - splitPdfConcurrencyLevel: 15 + splitPdfConcurrencyLevel: 15, + extractImageBlockTypes: ["Image", "Table"] }; const res: PartitionResponse = await client.general.partition({