From af5dd52d598266bf60985e0b270307a4a2d4e8df Mon Sep 17 00:00:00 2001 From: Yuming Long Date: Wed, 25 Sep 2024 15:18:18 -0700 Subject: [PATCH 1/3] without forcing file extension --- src/hooks/custom/utils/pdf.ts | 7 +++---- test/unit/utils/pdf.test.ts | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/hooks/custom/utils/pdf.ts b/src/hooks/custom/utils/pdf.ts index c95f508e..b65c9494 100644 --- a/src/hooks/custom/utils/pdf.ts +++ b/src/hooks/custom/utils/pdf.ts @@ -98,8 +98,7 @@ export async function splitPdf( } /** - * Checks if the given file is a PDF. First it checks the `.pdf` file extension, then - * it tries to load the file as a PDF using the `PDFDocument.load` method. + * Checks if the given file is a PDF by loading the file as a PDF using the `PDFDocument.load` method. * @param file - The file to check. * @returns A promise that resolves to three values, first is a boolean representing * whether there was an error during PDF load, second is a PDFDocument object or null @@ -109,8 +108,8 @@ export async function splitPdf( export async function loadPdf( file: File | null ): Promise<[boolean, PDFDocument | null, number]> { - if (!file?.name.endsWith(".pdf")) { - console.info("Given file is not a PDF, so splitting is not enabled."); + if (!file) { + console.info("Given file is null, so splitting is not enabled."); return [true, null, 0]; } diff --git a/test/unit/utils/pdf.test.ts b/test/unit/utils/pdf.test.ts index 7e9fdf5a..caae7acc 100644 --- a/test/unit/utils/pdf.test.ts +++ b/test/unit/utils/pdf.test.ts @@ -97,7 +97,7 @@ describe("Pdf utility functions", () => { }); describe("loadPdf", () => { - it("should return true, null, and 0 if the file is not a PDF", async () => { + it("should return true, null, and 0 if the file is null", async () => { const result = await loadPdf(null); expect(result).toEqual([true, null, 0]); @@ -115,6 +115,19 @@ describe("Pdf utility functions", () => { expect(file.content).not.toHaveBeenCalled(); }); + it("should return true, null, and 0 if the file is not a PDF without basing on file extension", async () => { + const file = { + name: "uuid1234", + content: jest.fn().mockResolvedValue(new ArrayBuffer(0)), + }; + + const result = await loadPdf(file as any); + + expect(result).toEqual([true, null, 0]); + expect(file.content).not.toHaveBeenCalled(); + }); + + it("should return true, null, and 0 if there is an error while loading the PDF", async () => { const file = { name: "document.pdf", @@ -143,5 +156,24 @@ describe("Pdf utility functions", () => { expect(loadMock).toHaveBeenCalledTimes(1); expect(loadMock).toHaveBeenCalledWith(f.arrayBuffer()); }); + + it("should return false, PDFDocument object, and the number of pages if the PDF is loaded successfully without basing on file extension", async () => { + const file = readFileSync("test/data/layout-parser-paper-fast.pdf"); + const f = { + name: "uuid1234", + arrayBuffer: () => file.buffer, + }; + + jest.clearAllMocks(); // Reset Mocks Between Tests + const loadMock = jest.spyOn(PDFDocument, "load"); + + const [error, _, pages] = await loadPdf(f as any); + + expect(error).toBeFalsy(); + expect(pages).toEqual(2); + expect(loadMock).toHaveBeenCalledTimes(1); + expect(loadMock).toHaveBeenCalledWith(f.arrayBuffer()); + }); + }); }); From 8f26e64eb220136b6d3a5758bc6251c15a5bf0ae Mon Sep 17 00:00:00 2001 From: Yuming Long Date: Thu, 26 Sep 2024 15:18:31 -0700 Subject: [PATCH 2/3] move error/warning log to info --- src/hooks/custom/utils/pdf.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/hooks/custom/utils/pdf.ts b/src/hooks/custom/utils/pdf.ts index b65c9494..e984de6d 100644 --- a/src/hooks/custom/utils/pdf.ts +++ b/src/hooks/custom/utils/pdf.ts @@ -119,10 +119,7 @@ export async function loadPdf( const pagesCount = pdf.getPages().length; return [false, pdf, pagesCount]; } catch (e) { - console.error(e); - console.warn( - "Attempted to interpret file as pdf, but error arose when splitting by pages. Reverting to non-split pdf handling path." - ); + console.info("Loading PDF failed, so splitting is not enabled."); return [true, null, 0]; } } From 8577e5e0f346b70f6281ab86cd73418003d6d6a2 Mon Sep 17 00:00:00 2001 From: Yuming Long Date: Thu, 26 Sep 2024 16:18:21 -0700 Subject: [PATCH 3/3] remove log at all --- src/hooks/custom/utils/pdf.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/hooks/custom/utils/pdf.ts b/src/hooks/custom/utils/pdf.ts index e984de6d..7c28e95c 100644 --- a/src/hooks/custom/utils/pdf.ts +++ b/src/hooks/custom/utils/pdf.ts @@ -119,7 +119,6 @@ export async function loadPdf( const pagesCount = pdf.getPages().length; return [false, pdf, pagesCount]; } catch (e) { - console.info("Loading PDF failed, so splitting is not enabled."); return [true, null, 0]; } }