diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 62f8b7cf..2ba87b98 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -12,6 +12,10 @@ permissions: id-token: write contents: read +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test_unit: runs-on: ubuntu-latest diff --git a/src/hooks/custom/FixArrayParamsHook.ts b/src/hooks/custom/FixArrayParamsHook.ts new file mode 100644 index 00000000..dbff0eb6 --- /dev/null +++ b/src/hooks/custom/FixArrayParamsHook.ts @@ -0,0 +1,55 @@ + +import { + type BeforeRequestContext, + BeforeRequestHook, +} from "../types.js"; +import { prepareRequestHeaders } from "./utils/request.js"; + +/** + * If the given key in FormData is present and contains a comma-separated list of values, + * split the values into separate entries with the key suffixed by "[]". + * + * @param formData - The FormData object to modify. + * @param key - The key to extract and split. + */ +function flattenArrayParameter(formData: FormData, key: string): void { + const value = formData.get(key); + if (formData && typeof value === "string" && value.includes(",")) { + formData.delete(key); + const values = value.split(",").map(v => v.trim()).filter(Boolean); + for (const v of values) { + formData.append(`${key}[]`, v); + } + } +} +/** + * Represents a hook for fixing array parameters before sending a request. + */ +export class FixArrayParamsHook implements BeforeRequestHook { + /** + * Fixes specific array parameters in the request. + * The SDK creates FormData with {extract_image_block_types: "a,b,c"}, + * and the server expects it to be {extract_image_block_types[]: ["a", "b", "c"]}. + * Speakeasy will fix this upstream soon. + * + * @param _hookCtx - The context object for the hook, containing metadata about the request. + * @param request - The original Request object. + * @returns A new Request object with modified form data and headers. + */ + async beforeRequest( + _hookCtx: BeforeRequestContext, + request: Request + ): Promise { + const requestClone = request.clone(); + const formData = await requestClone.formData(); + + flattenArrayParameter(formData, "extract_image_block_types"); + + const headers = prepareRequestHeaders(requestClone); + + return new Request(requestClone, { + body: formData, + headers: headers, + }); + } +} diff --git a/src/hooks/custom/utils/request.ts b/src/hooks/custom/utils/request.ts index 03a1dfde..b8f6a65d 100644 --- a/src/hooks/custom/utils/request.ts +++ b/src/hooks/custom/utils/request.ts @@ -1,5 +1,4 @@ import { - EXTRACT_IMAGE_BLOCK_TYPES, PARTITION_FORM_FILES_KEY, PARTITION_FORM_SPLIT_PDF_PAGE_KEY, PARTITION_FORM_STARTING_PAGE_NUMBER_KEY, @@ -90,13 +89,5 @@ export async function prepareRequestBody( startingPageNumber.toString() ); - if (formData.has(EXTRACT_IMAGE_BLOCK_TYPES)) { - newFormData.delete(EXTRACT_IMAGE_BLOCK_TYPES); - const extractImageBlockTypes = (formData.get(EXTRACT_IMAGE_BLOCK_TYPES)?.toString() || "").split(","); - for(const blockType of extractImageBlockTypes) { - newFormData.append(EXTRACT_IMAGE_BLOCK_TYPES, blockType); - } - } - return newFormData; } diff --git a/src/hooks/registration.ts b/src/hooks/registration.ts index 26dba7ff..ae4f7438 100644 --- a/src/hooks/registration.ts +++ b/src/hooks/registration.ts @@ -3,6 +3,7 @@ import { Hooks } from "./types.js"; import { LoggerHook } from "./custom/LoggerHook.js"; import { SplitPdfHook } from "./custom/SplitPdfHook.js"; import { HttpsCheckHook } from "./custom/HttpsCheckHook.js"; +import { FixArrayParamsHook } from "./custom/FixArrayParamsHook.js"; /* * This file is only ever generated once on the first generation and then is free to be modified. @@ -19,6 +20,7 @@ export function initHooks(hooks: Hooks) { const loggerHook = new LoggerHook(); const splitPdfHook = new SplitPdfHook(); const httpsCheckHook = new HttpsCheckHook(); + const fixArrayParamsHook = new FixArrayParamsHook(); // NOTE: logger_hook should stay registered last as logs the status of // request and whether it will be retried which can be changed by e.g. split_pdf_hook @@ -28,6 +30,7 @@ export function initHooks(hooks: Hooks) { hooks.registerSDKInitHook(splitPdfHook); // Register before request hooks + hooks.registerBeforeRequestHook(fixArrayParamsHook) hooks.registerBeforeRequestHook(splitPdfHook); // Register after success hooks diff --git a/test/unit/FixArrayParamsHook.test.ts b/test/unit/FixArrayParamsHook.test.ts new file mode 100644 index 00000000..e263a1ba --- /dev/null +++ b/test/unit/FixArrayParamsHook.test.ts @@ -0,0 +1,68 @@ +import { readFileSync } from "fs"; + +import { UnstructuredClient } from "../../src"; +import { PartitionResponse } from "../../src/sdk/models/operations"; +import { PartitionParameters, Strategy } from "../../src/sdk/models/shared"; +import { describe, it, expect, vi, beforeEach} from 'vitest'; + +describe("FixArrayParamsHook unit tests", () => { + beforeEach(() => { + // Reset the mock before each test + vi.resetAllMocks(); + }); + + // Assert that array parameters are sent in the correct format + // This should work with and without pdf splitting + it.each([ + {splitPdfPage: false}, + {splitPdfPage: true}, + ])( + "should send extract_image_block_types in the correct format", async ({splitPdfPage}) => { + const client = new UnstructuredClient({}); + + const file = { + content: readFileSync("test/data/layout-parser-paper-fast.pdf"), + fileName: "test/data/layout-parser-paper-fast.pdf", + }; + + const requestParams: PartitionParameters = { + files: file, + strategy: Strategy.Fast, + extractImageBlockTypes: ["a", "b", "c"], + splitPdfPage: splitPdfPage, + }; + + const fetchMock = vi.fn().mockResolvedValue( + new Response( + JSON.stringify([ + { + type: "Image", + element_id: "2fe9cbfbf0ff1bd64cc4705347dbd1d6", + text: "This is a test", + metadata: {}, + }, + ]), + { + status: 200, + headers: { "Content-Type": "application/json" }, + } + ) + ); + + vi.stubGlobal("fetch", fetchMock); + + const res: PartitionResponse = await client.general.partition({ + partitionParameters: requestParams, + }); + + expect(fetchMock).toHaveBeenCalledTimes(1); + + const request = fetchMock.mock.calls[0][0]; + const formData = await request.formData(); + const extract_image_block_types = formData.getAll( + "extract_image_block_types[]" + ); + + expect(extract_image_block_types).toEqual(["a", "b", "c"]); + }); +});