Skip to content

Commit bfe73b1

Browse files
authored
fix/ensure array params are sent to the server correctly (#151)
Fixes #135 The SDK sends the wrong formdata when the user adds an array param. For instance, the following call: ``` client.general.partition({ partitionParameters: { files: { content: data, fileName: filename }, extractImageBlockTypes: ["Image", "Table"], } }) ``` Sends the list as one comma separated FormData value: `extract_image_block_types: "Image,Table"'. The server will ignore this unless it's sent with multiple keys like so: `extract_image_block_types[]: "Image", extract_image_block_types[]: "Table"` We addressed this before in #122 by adjusting the request body before sending it, but this code path is for pdf splitting. A better solution is to add a new hook that will clean up the form data for every request. Then we can remove the modification in the splitting code.
1 parent 6c0ae3d commit bfe73b1

File tree

5 files changed

+130
-9
lines changed

5 files changed

+130
-9
lines changed

.github/workflows/ci.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ permissions:
1212
id-token: write
1313
contents: read
1414

15+
concurrency:
16+
group: ${{ github.workflow }}-${{ github.ref }}
17+
cancel-in-progress: true
18+
1519
jobs:
1620
test_unit:
1721
runs-on: ubuntu-latest
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
2+
import {
3+
type BeforeRequestContext,
4+
BeforeRequestHook,
5+
} from "../types.js";
6+
import { prepareRequestHeaders } from "./utils/request.js";
7+
8+
/**
9+
* If the given key in FormData is present and contains a comma-separated list of values,
10+
* split the values into separate entries with the key suffixed by "[]".
11+
*
12+
* @param formData - The FormData object to modify.
13+
* @param key - The key to extract and split.
14+
*/
15+
function flattenArrayParameter(formData: FormData, key: string): void {
16+
const value = formData.get(key);
17+
if (formData && typeof value === "string" && value.includes(",")) {
18+
formData.delete(key);
19+
const values = value.split(",").map(v => v.trim()).filter(Boolean);
20+
for (const v of values) {
21+
formData.append(`${key}[]`, v);
22+
}
23+
}
24+
}
25+
/**
26+
* Represents a hook for fixing array parameters before sending a request.
27+
*/
28+
export class FixArrayParamsHook implements BeforeRequestHook {
29+
/**
30+
* Fixes specific array parameters in the request.
31+
* The SDK creates FormData with {extract_image_block_types: "a,b,c"},
32+
* and the server expects it to be {extract_image_block_types[]: ["a", "b", "c"]}.
33+
* Speakeasy will fix this upstream soon.
34+
*
35+
* @param _hookCtx - The context object for the hook, containing metadata about the request.
36+
* @param request - The original Request object.
37+
* @returns A new Request object with modified form data and headers.
38+
*/
39+
async beforeRequest(
40+
_hookCtx: BeforeRequestContext,
41+
request: Request
42+
): Promise<Request> {
43+
const requestClone = request.clone();
44+
const formData = await requestClone.formData();
45+
46+
flattenArrayParameter(formData, "extract_image_block_types");
47+
48+
const headers = prepareRequestHeaders(requestClone);
49+
50+
return new Request(requestClone, {
51+
body: formData,
52+
headers: headers,
53+
});
54+
}
55+
}

src/hooks/custom/utils/request.ts

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import {
2-
EXTRACT_IMAGE_BLOCK_TYPES,
32
PARTITION_FORM_FILES_KEY,
43
PARTITION_FORM_SPLIT_PDF_PAGE_KEY,
54
PARTITION_FORM_STARTING_PAGE_NUMBER_KEY,
@@ -90,13 +89,5 @@ export async function prepareRequestBody(
9089
startingPageNumber.toString()
9190
);
9291

93-
if (formData.has(EXTRACT_IMAGE_BLOCK_TYPES)) {
94-
newFormData.delete(EXTRACT_IMAGE_BLOCK_TYPES);
95-
const extractImageBlockTypes = (formData.get(EXTRACT_IMAGE_BLOCK_TYPES)?.toString() || "").split(",");
96-
for(const blockType of extractImageBlockTypes) {
97-
newFormData.append(EXTRACT_IMAGE_BLOCK_TYPES, blockType);
98-
}
99-
}
100-
10192
return newFormData;
10293
}

src/hooks/registration.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { Hooks } from "./types.js";
33
import { LoggerHook } from "./custom/LoggerHook.js";
44
import { SplitPdfHook } from "./custom/SplitPdfHook.js";
55
import { HttpsCheckHook } from "./custom/HttpsCheckHook.js";
6+
import { FixArrayParamsHook } from "./custom/FixArrayParamsHook.js";
67

78
/*
89
* This file is only ever generated once on the first generation and then is free to be modified.
@@ -19,6 +20,7 @@ export function initHooks(hooks: Hooks) {
1920
const loggerHook = new LoggerHook();
2021
const splitPdfHook = new SplitPdfHook();
2122
const httpsCheckHook = new HttpsCheckHook();
23+
const fixArrayParamsHook = new FixArrayParamsHook();
2224

2325
// NOTE: logger_hook should stay registered last as logs the status of
2426
// request and whether it will be retried which can be changed by e.g. split_pdf_hook
@@ -28,6 +30,7 @@ export function initHooks(hooks: Hooks) {
2830
hooks.registerSDKInitHook(splitPdfHook);
2931

3032
// Register before request hooks
33+
hooks.registerBeforeRequestHook(fixArrayParamsHook)
3134
hooks.registerBeforeRequestHook(splitPdfHook);
3235

3336
// Register after success hooks
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import { readFileSync } from "fs";
2+
3+
import { UnstructuredClient } from "../../src";
4+
import { PartitionResponse } from "../../src/sdk/models/operations";
5+
import { PartitionParameters, Strategy } from "../../src/sdk/models/shared";
6+
import { describe, it, expect, vi, beforeEach} from 'vitest';
7+
8+
describe("FixArrayParamsHook unit tests", () => {
9+
beforeEach(() => {
10+
// Reset the mock before each test
11+
vi.resetAllMocks();
12+
});
13+
14+
// Assert that array parameters are sent in the correct format
15+
// This should work with and without pdf splitting
16+
it.each([
17+
{splitPdfPage: false},
18+
{splitPdfPage: true},
19+
])(
20+
"should send extract_image_block_types in the correct format", async ({splitPdfPage}) => {
21+
const client = new UnstructuredClient({});
22+
23+
const file = {
24+
content: readFileSync("test/data/layout-parser-paper-fast.pdf"),
25+
fileName: "test/data/layout-parser-paper-fast.pdf",
26+
};
27+
28+
const requestParams: PartitionParameters = {
29+
files: file,
30+
strategy: Strategy.Fast,
31+
extractImageBlockTypes: ["a", "b", "c"],
32+
splitPdfPage: splitPdfPage,
33+
};
34+
35+
const fetchMock = vi.fn().mockResolvedValue(
36+
new Response(
37+
JSON.stringify([
38+
{
39+
type: "Image",
40+
element_id: "2fe9cbfbf0ff1bd64cc4705347dbd1d6",
41+
text: "This is a test",
42+
metadata: {},
43+
},
44+
]),
45+
{
46+
status: 200,
47+
headers: { "Content-Type": "application/json" },
48+
}
49+
)
50+
);
51+
52+
vi.stubGlobal("fetch", fetchMock);
53+
54+
const res: PartitionResponse = await client.general.partition({
55+
partitionParameters: requestParams,
56+
});
57+
58+
expect(fetchMock).toHaveBeenCalledTimes(1);
59+
60+
const request = fetchMock.mock.calls[0][0];
61+
const formData = await request.formData();
62+
const extract_image_block_types = formData.getAll(
63+
"extract_image_block_types[]"
64+
);
65+
66+
expect(extract_image_block_types).toEqual(["a", "b", "c"]);
67+
});
68+
});

0 commit comments

Comments
 (0)