Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ permissions:
id-token: write
contents: read

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
test_unit:
runs-on: ubuntu-latest
Expand Down
55 changes: 55 additions & 0 deletions src/hooks/custom/FixArrayParamsHook.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

import {
type BeforeRequestContext,
BeforeRequestHook,
} from "../types.js";
import { prepareRequestHeaders } from "./utils/request.js";

/**
* If the given key in FormData is present and contains a comma-separated list of values,
* split the values into separate entries with the key suffixed by "[]".
*
* @param formData - The FormData object to modify.
* @param key - The key to extract and split.
*/
function flattenArrayParameter(formData: FormData, key: string): void {
const value = formData.get(key);
if (formData && typeof value === "string" && value.includes(",")) {
formData.delete(key);
const values = value.split(",").map(v => v.trim()).filter(Boolean);
for (const v of values) {
formData.append(`${key}[]`, v);
}
}
}
/**
* Represents a hook for fixing array parameters before sending a request.
*/
export class FixArrayParamsHook implements BeforeRequestHook {
/**
* Fixes specific array parameters in the request.
* The SDK creates FormData with {extract_image_block_types: "a,b,c"},
* and the server expects it to be {extract_image_block_types[]: ["a", "b", "c"]}.
* Speakeasy will fix this upstream soon.
*
* @param _hookCtx - The context object for the hook, containing metadata about the request.
* @param request - The original Request object.
* @returns A new Request object with modified form data and headers.
*/
async beforeRequest(
_hookCtx: BeforeRequestContext,
request: Request
): Promise<Request> {
const requestClone = request.clone();
const formData = await requestClone.formData();

flattenArrayParameter(formData, "extract_image_block_types");

const headers = prepareRequestHeaders(requestClone);

return new Request(requestClone, {
body: formData,
headers: headers,
});
}
}
9 changes: 0 additions & 9 deletions src/hooks/custom/utils/request.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import {
EXTRACT_IMAGE_BLOCK_TYPES,
PARTITION_FORM_FILES_KEY,
PARTITION_FORM_SPLIT_PDF_PAGE_KEY,
PARTITION_FORM_STARTING_PAGE_NUMBER_KEY,
Expand Down Expand Up @@ -90,13 +89,5 @@ export async function prepareRequestBody(
startingPageNumber.toString()
);

if (formData.has(EXTRACT_IMAGE_BLOCK_TYPES)) {
newFormData.delete(EXTRACT_IMAGE_BLOCK_TYPES);
const extractImageBlockTypes = (formData.get(EXTRACT_IMAGE_BLOCK_TYPES)?.toString() || "").split(",");
for(const blockType of extractImageBlockTypes) {
newFormData.append(EXTRACT_IMAGE_BLOCK_TYPES, blockType);
}
}

return newFormData;
}
3 changes: 3 additions & 0 deletions src/hooks/registration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { Hooks } from "./types.js";
import { LoggerHook } from "./custom/LoggerHook.js";
import { SplitPdfHook } from "./custom/SplitPdfHook.js";
import { HttpsCheckHook } from "./custom/HttpsCheckHook.js";
import { FixArrayParamsHook } from "./custom/FixArrayParamsHook.js";

/*
* This file is only ever generated once on the first generation and then is free to be modified.
Expand All @@ -19,6 +20,7 @@ export function initHooks(hooks: Hooks) {
const loggerHook = new LoggerHook();
const splitPdfHook = new SplitPdfHook();
const httpsCheckHook = new HttpsCheckHook();
const fixArrayParamsHook = new FixArrayParamsHook();

// NOTE: logger_hook should stay registered last as logs the status of
// request and whether it will be retried which can be changed by e.g. split_pdf_hook
Expand All @@ -28,6 +30,7 @@ export function initHooks(hooks: Hooks) {
hooks.registerSDKInitHook(splitPdfHook);

// Register before request hooks
hooks.registerBeforeRequestHook(fixArrayParamsHook)
hooks.registerBeforeRequestHook(splitPdfHook);

// Register after success hooks
Expand Down
68 changes: 68 additions & 0 deletions test/unit/FixArrayParamsHook.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { readFileSync } from "fs";

import { UnstructuredClient } from "../../src";
import { PartitionResponse } from "../../src/sdk/models/operations";
import { PartitionParameters, Strategy } from "../../src/sdk/models/shared";
import { describe, it, expect, vi, beforeEach} from 'vitest';

describe("FixArrayParamsHook unit tests", () => {
beforeEach(() => {
// Reset the mock before each test
vi.resetAllMocks();
});

// Assert that array parameters are sent in the correct format
// This should work with and without pdf splitting
it.each([
{splitPdfPage: false},
{splitPdfPage: true},
])(
"should send extract_image_block_types in the correct format", async ({splitPdfPage}) => {
const client = new UnstructuredClient({});

const file = {
content: readFileSync("test/data/layout-parser-paper-fast.pdf"),
fileName: "test/data/layout-parser-paper-fast.pdf",
};

const requestParams: PartitionParameters = {
files: file,
strategy: Strategy.Fast,
extractImageBlockTypes: ["a", "b", "c"],
splitPdfPage: splitPdfPage,
};

const fetchMock = vi.fn().mockResolvedValue(
new Response(
JSON.stringify([
{
type: "Image",
element_id: "2fe9cbfbf0ff1bd64cc4705347dbd1d6",
text: "This is a test",
metadata: {},
},
]),
{
status: 200,
headers: { "Content-Type": "application/json" },
}
)
);

vi.stubGlobal("fetch", fetchMock);

const res: PartitionResponse = await client.general.partition({
partitionParameters: requestParams,
});

expect(fetchMock).toHaveBeenCalledTimes(1);

const request = fetchMock.mock.calls[0][0];
const formData = await request.formData();
const extract_image_block_types = formData.getAll(
"extract_image_block_types[]"
);

expect(extract_image_block_types).toEqual(["a", "b", "c"]);
});
});